Merge remote-tracking branch 'upstream/master' into memory_rework

This commit is contained in:
wheremyfoodat 2024-11-09 18:29:40 +02:00
commit b89a21444e
233 changed files with 17350 additions and 1024 deletions

293
.github/gles.patch vendored
View file

@ -1,52 +1,3 @@
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
index a11a6ffa..77486a09 100644
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() {
}
glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glActiveTexture(GL_TEXTURE0);
}
void RendererGL::updateLightingLUT() {
- gpu.lightingLUTDirty = false;
- std::array<u16, GPU::LightingLutSize> u16_lightinglut;
-
- for (int i = 0; i < gpu.lightingLUT.size(); i++) {
- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
- u16_lightinglut[i] = value * 65535 / 4095;
- }
-
- glActiveTexture(GL_TEXTURE0 + 3);
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- glActiveTexture(GL_TEXTURE0);
+ // gpu.lightingLUTDirty = false;
+ // std::array<u16, GPU::LightingLutSize> u16_lightinglut;
+
+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) {
+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
+ // u16_lightinglut[i] = value * 65535 / 4095;
+ // }
+
+ // glActiveTexture(GL_TEXTURE0 + 3);
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ // glActiveTexture(GL_TEXTURE0);
}
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag
index 612671c8..1937f711 100644
--- a/src/host_shaders/opengl_display.frag
@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644
void main() {
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
index f6fa6c55..bb88e278 100644
index 9f07df0b..96a35afa 100644
--- a/src/host_shaders/opengl_fragment_shader.frag
+++ b/src/host_shaders/opengl_fragment_shader.frag
@@ -1,4 +1,5 @@
@ -78,36 +29,29 @@ index f6fa6c55..bb88e278 100644
+#version 300 es
+precision mediump float;
in vec3 v_tangent;
in vec3 v_normal;
@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
-uniform sampler1DArray u_tex_lighting_lut;
+// uniform sampler1DArray u_tex_lighting_lut;
in vec4 v_quaternion;
in vec4 v_colour;
@@ -41,8 +42,8 @@ vec3 normal;
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);
uniform uint u_picaRegs[0x200 - 0x48];
bool isSamplerEnabled(uint environment_id, uint lut_id) {
- uint index = 7 * environment_id + lut_id;
- uint arrayIndex = (index >> 5);
+ uint index = 7u * environment_id + lut_id;
+ uint arrayIndex = (index >> 5u);
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
}
@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) {
#define RR_LUT 6u
@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) {
return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r;
}
float lutLookup(uint lut, uint light, float value) {
- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
- if (lut == SP_LUT) lut = light + 8;
- return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
+ // if (lut == SP_LUT) lut = light + 8;
+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r;
+ return 0.0;
+}
+
+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead
+// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead
+uint bitfieldExtractCompat(uint val, int off, int size) {
+ uint mask = uint((1 << size) - 1);
+ return uint(val >> off) & mask;
}
+}
+
vec3 regToColor(uint reg) {
// Normalization scale to convert from [0...255] to [0.0...1.0]
const float scale = 1.0 / 255.0;
@ -117,89 +61,115 @@ index f6fa6c55..bb88e278 100644
}
// Convert an arbitrary-width floating point literal to an f32
@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec3 view = normalize(v_view);
@@ -201,7 +208,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
// These are the spotlight attenuation LUTs
bit_in_config1 = 8 + int(light_id & 7u);
lut_index = 8u + light_id;
- } else if (lut_id <= 6) {
+ } else if (lut_id <= 6u) {
bit_in_config1 = 16 + int(lut_id);
lut_index = lut_id;
} else {
@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
+ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
return 1.0;
}
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
float delta = 1.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
switch (input_id) {
case 0u: {
delta = dot(normal, normalize(half_vector));
@@ -243,9 +250,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
// of GLSL so we do it manually
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
+ int se_x = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 0, 13));
+ int se_y = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 16, 13));
+ int se_z = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_HIGH), 0, 13));
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
}
// 0 = enabled
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
delta = max(delta, 0.0);
} else {
delta = abs(delta);
@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
// Implements the following algorthm: https://mathb.in/26766
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
primary_color = secondary_color = vec4(1.0);
primary_color = secondary_color = vec4(0.0);
return;
}
@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
bool error_unimpl = false;
@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
switch (bump_mode) {
@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
+ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4);
+ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
uint light_id;
vec3 light_vector;
vec3 half_vector;
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
vec3 light_vector = normalize(vec3(
float light_distance;
vec3 light_position = vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
));
vec3 half_vector;
);
// Positional Light
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
// error_unimpl = true;
half_vector = normalize(normalize(light_vector + v_view) + view);
}
@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
light_vector = light_position + v_view;
}
for (int c = 0; c < 7; c++) {
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id);
if (scale_id >= 6u) scale /= 256.0;
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
if (input_id == 0u)
d[c] = dot(normal, half_vector);
else if (input_id == 1u)
@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
vec3 spot_light_vector = normalize(vec3(
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
));
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
} else if (input_id == 5u) {
@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
}
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
} else {
d[c] = 1.0;
}
}
- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4);
if (lookup_config == 0u) {
d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0;
@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // Li dot N
@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
float NdotL = dot(normal, light_vector); // N dot Li
// Two sided diffuse
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
@ -207,19 +177,40 @@ index f6fa6c55..bb88e278 100644
NdotL = max(0.0, NdotL);
else
NdotL = abs(NdotL);
@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
float geometric_factor;
- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
+ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
+ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
if (use_geo_0 || use_geo_1) {
geometric_factor = dot(half_vector, half_vector);
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
}
float distance_attenuation = 1.0;
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
}
- uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
- uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
+ uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1);
+ uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1);
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
// Uses parameters from the last light as Fresnel is only applied to the last light
float fresnel_factor;
diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert
index a25d7a6d..7cf40398 100644
index 057f9a88..dc735ced 100644
--- a/src/host_shaders/opengl_vertex_shader.vert
+++ b/src/host_shaders/opengl_vertex_shader.vert
@@ -1,4 +1,6 @@
@ -230,7 +221,7 @@ index a25d7a6d..7cf40398 100644
layout(location = 0) in vec4 a_coords;
layout(location = 1) in vec4 a_quaternion;
@@ -20,7 +22,7 @@ out vec2 v_texcoord2;
@@ -18,7 +20,7 @@ out vec2 v_texcoord2;
flat out vec4 v_textureEnvColor[6];
flat out vec4 v_textureEnvBufferColor;
@ -239,7 +230,7 @@ index a25d7a6d..7cf40398 100644
// TEV uniforms
uniform uint u_textureEnvColor[6];
@@ -93,6 +95,6 @@ void main() {
@@ -81,8 +83,8 @@ void main() {
);
// There's also another, always-on clipping plane based on vertex z
@ -247,16 +238,20 @@ index a25d7a6d..7cf40398 100644
- gl_ClipDistance[1] = dot(clipData, a_coords);
+ // gl_ClipDistance[0] = -a_coords.z;
+ // gl_ClipDistance[1] = dot(clipData, a_coords);
v_quaternion = a_quaternion;
}
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
index f368f573..5ead7f63 100644
index 607815fa..cbfcc096 100644
--- a/third_party/opengl/opengl.hpp
+++ b/third_party/opengl/opengl.hpp
@@ -520,21 +520,21 @@ namespace OpenGL {
@@ -602,22 +602,22 @@ namespace OpenGL {
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
static void enableBlend() { glEnable(GL_BLEND); }
static void disableBlend() { glDisable(GL_BLEND); }
static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
- static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); }
+ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ }
+ static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ }
static void enableDepth() { glEnable(GL_DEPTH_TEST); }
static void disableDepth() { glDisable(GL_DEPTH_TEST); }

View file

@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec"
# Construct the app iconset.
mkdir alber.iconset
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
iconutil --convert icns alber.iconset
# Set up the .app directory

20
.github/mac-bundle.sh vendored
View file

@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec"
# Construct the app iconset.
mkdir alber.iconset
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
iconutil --convert icns alber.iconset
# Set up the .app directory

View file

@ -8,7 +8,7 @@ on:
jobs:
x64:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
strategy:
matrix:
@ -73,7 +73,7 @@ jobs:
./src/pandroid/app/build/outputs/apk/${{ env.BUILD_TYPE }}/app-${{ env.BUILD_TYPE }}.apk
arm64:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
strategy:
matrix:

View file

@ -16,10 +16,10 @@ jobs:
# well on Windows or Mac. You can convert this to a matrix build if you need
# cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive

View file

@ -15,7 +15,7 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
@ -32,18 +32,33 @@ jobs:
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
- name: Upload Hydra core
uses: actions/upload-artifact@v4
with:
name: Windows core
name: Windows Hydra core
path: '${{github.workspace}}/build/${{ env.BUILD_TYPE }}/Alber.dll'
- name: Configure CMake (Again)
run: |
rm -r -fo ${{github.workspace}}/build
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
- name: Build (Again)
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload Libretro core
uses: actions/upload-artifact@v4
with:
name: Windows Libretro core
path: |
${{github.workspace}}/build/${{ env.BUILD_TYPE }}/panda3ds_libretro.dll
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
MacOS:
runs-on: macos-13
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
@ -61,22 +76,38 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: MacOS core
name: MacOS Hydra core
path: '${{github.workspace}}/build/libAlber.dylib'
- name: Configure CMake (Again)
run: |
rm -rf ${{github.workspace}}/build
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
- name: Build (Again)
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} && ls -R ${{github.workspace}}/build
- name: Upload Libretro core
uses: actions/upload-artifact@v4
with:
name: MacOS Libretro core
path: |
${{github.workspace}}/build/panda3ds_libretro.dylib
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
Linux:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
- name: Install misc packages
run: |
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev
- name: Install newer Clang
run: |
@ -98,22 +129,38 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Linux core
name: Linux Hydra core
path: '${{github.workspace}}/build/libAlber.so'
- name: Configure CMake (Again)
run: |
rm -rf ${{github.workspace}}/build
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
- name: Build (Again)
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload Libretro core
uses: actions/upload-artifact@v4
with:
name: Linux Libretro core
path: |
${{github.workspace}}/build/panda3ds_libretro.so
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
Android-x64:
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
- name: Install misc packages
run: |
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev
- name: Setup Vulkan SDK
uses: humbletim/setup-vulkan-sdk@v1.2.0
@ -129,7 +176,7 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload core
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Android core
name: Android Hydra core
path: '${{github.workspace}}/build/libAlber.so'

View file

@ -16,15 +16,15 @@ jobs:
# well on Windows or Mac. You can convert this to a matrix build if you need
# cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-20.04
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
- name: Install misc packages
run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2
run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev
- name: Install newer Clang
run: |
@ -33,11 +33,11 @@ jobs:
sudo ./llvm.sh 17
- name: Setup Vulkan SDK
run: |
wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
sudo apt update
sudo apt install vulkan-sdk
uses: humbletim/setup-vulkan-sdk@v1.2.0
with:
vulkan-query-version: latest
vulkan-use-cache: true
vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang
- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
@ -52,7 +52,7 @@ jobs:
run: ./.github/linux-appimage.sh
- name: Upload executable
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Linux executable
path: './Alber-x86_64.AppImage'

View file

@ -16,15 +16,15 @@ jobs:
# well on Windows or Mac. You can convert this to a matrix build if you need
# cross-platform coverage.
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
- name: Install misc packages
run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev
run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libwayland-dev
- name: Install newer Clang
run: |
@ -49,7 +49,7 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload executable
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Linux executable
path: './build/Alber'

View file

@ -19,7 +19,7 @@ jobs:
runs-on: macos-13
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
@ -52,7 +52,7 @@ jobs:
run: zip -r Alber Alber.app
- name: Upload MacOS App
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: MacOS Alber App Bundle
path: 'Alber.zip'

View file

@ -15,7 +15,7 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
@ -45,7 +45,7 @@ jobs:
windeployqt --dir upload upload/Alber.exe
- name: Upload executable
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Windows executable
path: upload
@ -54,7 +54,7 @@ jobs:
runs-on: macos-13
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
@ -90,23 +90,22 @@ jobs:
run: zip -r Alber Alber.app
- name: Upload MacOS App
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: MacOS Alber App Bundle
path: 'Alber.zip'
Linux:
runs-on: ubuntu-20.04
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
- name: Install misc packages
run: |
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
sudo add-apt-repository -y ppa:savoury1/qt-6-2
sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev libgl1-mesa-dev
sudo apt update
sudo apt install qt6-base-dev qt6-base-private-dev
@ -117,11 +116,11 @@ jobs:
sudo ./llvm.sh 17
- name: Setup Vulkan SDK
run: |
wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
sudo apt update
sudo apt install vulkan-sdk
uses: humbletim/setup-vulkan-sdk@v1.2.0
with:
vulkan-query-version: latest
vulkan-use-cache: true
vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang
- name: Configure CMake
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON
@ -135,7 +134,7 @@ jobs:
./.github/linux-appimage-qt.sh
- name: Upload executable
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Linux executable
path: './Alber-x86_64.AppImage'

View file

@ -19,7 +19,7 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Fetch submodules
run: git submodule update --init --recursive
@ -40,7 +40,7 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Upload executable
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: Windows executable
path: './build/${{ env.BUILD_TYPE }}/Alber.exe'

4
.gitignore vendored
View file

@ -64,5 +64,9 @@ fb.bat
*.elf
*.smdh
# Compiled Metal shader files
*.ir
*.metallib
config.toml
CMakeSettings.json

130
.gitlab-ci.yml Normal file
View file

@ -0,0 +1,130 @@
# DESCRIPTION: GitLab CI/CD for libRetro (NOT FOR GitLab-proper)
##############################################################################
################################# BOILERPLATE ################################
##############################################################################
# Core definitions
.core-defs:
variables:
GIT_SUBMODULE_STRATEGY: recursive
CORENAME: panda3ds
CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF
# Inclusion templates, required for the build to work
include:
################################## DESKTOPS ################################
# Linux
- project: 'libretro-infrastructure/ci-templates'
file: '/linux-cmake.yml'
# Windows
- project: 'libretro-infrastructure/ci-templates'
file: '/windows-cmake-mingw.yml'
# MacOS
- project: 'libretro-infrastructure/ci-templates'
file: 'osx-cmake-x86.yml'
# MacOS
- project: 'libretro-infrastructure/ci-templates'
file: 'osx-cmake-arm64.yml'
################################## CELLULAR ################################
# Android
- project: 'libretro-infrastructure/ci-templates'
file: '/android-cmake.yml'
# iOS
- project: 'libretro-infrastructure/ci-templates'
file: '/ios-cmake.yml'
# Stages for building
stages:
- build-prepare
- build-static
- build-shared
##############################################################################
#################################### STAGES ##################################
##############################################################################
#
################################### DESKTOPS #################################
# Linux 64-bit
libretro-build-linux-x64:
image: $CI_SERVER_HOST:5050/libretro-infrastructure/libretro-build-amd64-ubuntu:latest
before_script:
- export NUMPROC=$(($(nproc)/5))
- sudo apt-get update -qy
- sudo apt-get install -qy software-properties-common
- sudo add-apt-repository -y ppa:savoury1/build-tools
- sudo add-apt-repository -y ppa:savoury1/gcc-defaults-12
- sudo apt-get update -qy
- sudo apt-get install -qy cmake gcc-12 g++-12
variables:
CC: /usr/bin/gcc-12
CXX: /usr/bin/g++-12
extends:
- .libretro-linux-cmake-x86_64
- .core-defs
# Windows 64-bit
libretro-build-windows-x64:
extends:
- .libretro-windows-cmake-x86_64
- .core-defs
# MacOS 64-bit
libretro-build-osx-x64:
tags:
- mac-apple-silicon
variables:
CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCRYPTOPP_AMD64=1
extends:
- .libretro-osx-cmake-x86
- .core-defs
# MacOS arm 64-bit
libretro-build-osx-arm64:
tags:
- mac-apple-silicon
extends:
- .libretro-osx-cmake-arm64
- .core-defs
################################### CELLULAR #################################
# Android ARMv7a
#android-armeabi-v7a:
# extends:
# - .libretro-android-cmake-armeabi-v7a
# - .core-defs
# Android ARMv8a
# android-arm64-v8a:
# extends:
# - .libretro-android-cmake-arm64-v8a
# - .core-defs
# Android 64-bit x86
# android-x86_64:
# extends:
# - .libretro-android-cmake-x86_64
# - .core-defs
# Android 32-bit x86
# android-x86:
# extends:
# - .libretro-android-cmake-x86
# - .core-defs
# iOS
# libretro-build-ios-arm64:
# extends:
# - .libretro-ios-cmake-arm64
# - .core-defs
# variables:
# CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DBUILD_PLAY=OFF -DENABLE_AMAZON_S3=off -DBUILD_TESTS=OFF -DCMAKE_TOOLCHAIN_FILE=deps/Dependencies/cmake-ios/ios.cmake -DTARGET_IOS=ON
# LIBNAME: ${CORENAME}_libretro_ios.dylib
################################### CONSOLES #################################

12
.gitmodules vendored
View file

@ -70,3 +70,15 @@
[submodule "third_party/capstone"]
path = third_party/capstone
url = https://github.com/capstone-engine/capstone
[submodule "third_party/hips"]
path = third_party/hips
url = https://github.com/wheremyfoodat/Hips
[submodule "third_party/metal-cpp"]
path = third_party/metal-cpp
url = https://github.com/Panda3DS-emu/metal-cpp
[submodule "third_party/fmt"]
path = third_party/fmt
url = https://github.com/fmtlib/fmt
[submodule "third_party/fdk-aac"]
path = third_party/fdk-aac
url = https://github.com/Panda3DS-emu/fdk-aac/

View file

@ -19,19 +19,37 @@ endif()
project(Alber)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
if(APPLE)
enable_language(OBJC)
endif()
# Enable RC support in order to use resource files for application icons
if(WIN32)
enable_language(RC)
set(APP_RESOURCES docs/img/windows_icon.rc)
endif()
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-invalid-offsetof")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size")
endif()
if(ANDROID)
set(DEFAULT_OPENGL_PROFILE OpenGLES)
else()
set(DEFAULT_OPENGL_PROFILE OpenGL)
endif()
option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON)
option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON)
option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON)
option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON)
option(ENABLE_LTO "Enable link-time optimization" OFF)
option(ENABLE_TESTS "Compile unit-tests" OFF)
option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF)
@ -39,12 +57,65 @@ option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF
option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default)" ON)
option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON)
option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
option(ENABLE_GIT_VERSIONING "Enables querying git for the emulator version" ON)
option(BUILD_HYDRA_CORE "Build a Hydra core" OFF)
option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF)
option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON)
option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF)
set(OPENGL_PROFILE ${DEFAULT_OPENGL_PROFILE} CACHE STRING "OpenGL profile to use if OpenGL is enabled. Valid values are 'OpenGL' and 'OpenGLES'.")
set_property(CACHE OPENGL_PROFILE PROPERTY STRINGS OpenGL OpenGLES)
if(ENABLE_OPENGL AND (OPENGL_PROFILE STREQUAL "OpenGLES"))
message(STATUS "Building with OpenGLES support")
add_compile_definitions(USING_GLES)
endif()
if(BUILD_HYDRA_CORE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif()
if(BUILD_LIBRETRO_CORE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_compile_definitions(__LIBRETRO__)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD)
# Disable stack buffer overflow checks in user builds
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-")
endif()
# Generate versioning files
find_package(Git)
set(PANDA3DS_VERSION "0.8")
if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/version.hpp.in)
file(WRITE ${CMAKE_BINARY_DIR}/include/version.hpp.in "#define PANDA3DS_VERSION \"\${PANDA3DS_VERSION}\"")
endif()
if(GIT_FOUND AND ENABLE_GIT_VERSIONING)
execute_process(
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0
OUTPUT_VARIABLE PANDA3DS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE
)
execute_process(
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags
OUTPUT_VARIABLE git_version_tag OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT PANDA3DS_VERSION STREQUAL git_version_tag)
execute_process(
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --always --abbrev=7
OUTPUT_VARIABLE git_version_rev OUTPUT_STRIP_TRAILING_WHITESPACE
)
set(PANDA3DS_VERSION "${PANDA3DS_VERSION}.${git_version_rev}")
unset(git_version_rev)
endif()
string(REGEX REPLACE "^v" "" PANDA3DS_VERSION "${PANDA3DS_VERSION}")
unset(git_version_tag)
endif()
configure_file(${CMAKE_BINARY_DIR}/include/version.hpp.in ${CMAKE_BINARY_DIR}/include/version.hpp)
include_directories(${CMAKE_BINARY_DIR}/include/)
add_library(AlberCore STATIC)
include_directories(${PROJECT_SOURCE_DIR}/include/)
@ -52,6 +123,7 @@ include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
include_directories(${FMT_INCLUDE_DIR})
include_directories(third_party/boost/)
include_directories(third_party/elfio/)
include_directories(third_party/hips/include/)
include_directories(third_party/imgui/)
include_directories(third_party/dynarmic/src)
include_directories(third_party/cryptopp/)
@ -82,10 +154,13 @@ if (NOT ANDROID)
target_link_libraries(AlberCore PUBLIC SDL2-static)
endif()
add_subdirectory(third_party/fmt)
add_subdirectory(third_party/toml11)
include_directories(${SDL2_INCLUDE_DIR})
include_directories(third_party/toml11)
include_directories(third_party/glm)
include_directories(third_party/renderdoc)
include_directories(third_party/duckstation)
add_subdirectory(third_party/cmrc)
@ -144,6 +219,18 @@ else()
set(HOST_ARM64 FALSE)
endif()
# Enable SSE4.1 if it's not explicitly disabled
# Annoyingly, we can't easily do this if we're using MSVC cause there's no SSE4.1 flag, only SSE4.1
if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT DISABLE_SSE4 AND HOST_X64)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
endif()
if(ENABLE_RENDERDOC_API)
find_package(RenderDoc 1.6.0 MODULE REQUIRED)
add_compile_definitions(PANDA3DS_ENABLE_RENDERDOC)
endif()
if(HOST_X64 OR HOST_ARM64)
set(DYNARMIC_TESTS OFF)
#set(DYNARMIC_NO_BUNDLED_FMT ON)
@ -155,6 +242,7 @@ else()
endif()
add_subdirectory(third_party/teakra EXCLUDE_FROM_ALL)
add_subdirectory(third_party/fdk-aac)
set(CAPSTONE_ARCHITECTURE_DEFAULT OFF)
set(CAPSTONE_ARM_SUPPORT ON)
@ -166,7 +254,7 @@ set(SOURCE_FILES src/emulator.cpp src/io_file.cpp src/config.cpp
src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp
src/core/memory.cpp src/renderer.cpp src/core/renderer_null/renderer_null.cpp
src/http_server.cpp src/stb_image_write.c src/core/cheats.cpp src/core/action_replay.cpp
src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp
src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp src/renderdoc.cpp
)
set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp)
set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp
@ -187,12 +275,13 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services
src/core/services/act.cpp src/core/services/nfc.cpp src/core/services/dlp_srvr.cpp
src/core/services/ir_user.cpp src/core/services/http.cpp src/core/services/soc.cpp
src/core/services/ssl.cpp src/core/services/news_u.cpp src/core/services/amiibo_device.cpp
src/core/services/csnd.cpp src/core/services/nwm_uds.cpp
src/core/services/csnd.cpp src/core/services/nwm_uds.cpp src/core/services/fonts.cpp
)
set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp
src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp
src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp
src/core/PICA/shader_decompiler.cpp src/core/PICA/draw_acceleration.cpp
)
set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp)
@ -205,7 +294,7 @@ set(APPLET_SOURCE_FILES src/core/applets/applet.cpp src/core/applets/mii_selecto
src/core/applets/error_applet.cpp
)
set(AUDIO_SOURCE_FILES src/core/audio/dsp_core.cpp src/core/audio/null_core.cpp src/core/audio/teakra_core.cpp
src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp
src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp src/core/audio/aac_decoder.cpp
)
set(RENDERER_SW_SOURCE_FILES src/core/renderer_sw/renderer_sw.cpp)
@ -224,7 +313,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp
include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp
include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp
include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp
include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp
include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp
include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp
include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp
@ -235,21 +324,24 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp
include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp
include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp
include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp
include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp
include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp
include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp
include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp
include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp
include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp
include/audio/hle_core.hpp include/capstone.hpp
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp
include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp
include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp
include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp
)
cmrc_add_resource_library(
resources_console_fonts
NAMESPACE ConsoleFonts
WHENCE "src/core/services/fonts/"
"src/core/services/fonts/CitraSharedFontUSRelocated.bin"
"src/core/services/fonts/SharedFontReplacement.bin"
)
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
@ -275,7 +367,6 @@ if(ENABLE_LUAJIT AND NOT ANDROID)
endif()
if(ENABLE_QT_GUI)
include_directories(third_party/duckstation)
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/window_info.cpp third_party/duckstation/gl/context.cpp)
if(APPLE)
@ -308,7 +399,7 @@ if(ENABLE_OPENGL)
set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp
include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
include/renderer_gl/gl_state.hpp
include/renderer_gl/gl_state.hpp include/renderer_gl/gl_driver.hpp
)
set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp
@ -318,6 +409,8 @@ if(ENABLE_OPENGL)
src/host_shaders/opengl_fragment_shader.frag
)
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/stream_buffer.cpp)
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES})
source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES})
@ -338,8 +431,8 @@ endif()
if(ENABLE_VULKAN)
find_package(
Vulkan 1.3.206 REQUIRED
COMPONENTS glslangValidator
Vulkan REQUIRED
COMPONENTS glslang
)
set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp
@ -382,7 +475,7 @@ if(ENABLE_VULKAN)
add_custom_command(
OUTPUT ${HOST_SHADER_SPIRV}
COMMAND ${CMAKE_COMMAND} -E make_directory "${PROJECT_BINARY_DIR}/host_shaders/"
COMMAND Vulkan::glslangValidator ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV}
COMMAND glslang ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV}
DEPENDS ${HOST_SHADER_SOURCE}
)
list( APPEND RENDERER_VK_HOST_SHADERS_SPIRV ${HOST_SHADER_SPIRV} )
@ -400,14 +493,88 @@ if(ENABLE_VULKAN)
target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk)
endif()
if(ENABLE_METAL AND APPLE)
set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp
include/renderer_mtl/mtl_depth_stencil_cache.hpp
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
include/renderer_mtl/mtl_render_target.hpp
include/renderer_mtl/mtl_texture.hpp
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
include/renderer_mtl/mtl_lut_texture.hpp
include/renderer_mtl/mtl_command_encoder.hpp
include/renderer_mtl/mtl_common.hpp
include/renderer_mtl/pica_to_mtl.hpp
include/renderer_mtl/objc_helper.hpp
)
set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
src/core/renderer_mtl/renderer_mtl.cpp
src/core/renderer_mtl/mtl_texture.cpp
src/core/renderer_mtl/mtl_etc1.cpp
src/core/renderer_mtl/mtl_lut_texture.cpp
src/core/renderer_mtl/objc_helper.mm
src/host_shaders/metal_shaders.metal
src/host_shaders/metal_blit.metal
#src/host_shaders/metal_copy_to_lut_texture.metal
)
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES})
source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES})
set(RENDERER_MTL_HOST_SHADERS_SOURCES)
function (add_metal_shader SHADER)
set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
# TODO: only include sources in debug builds
add_custom_command(
OUTPUT ${SHADER_IR}
COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
DEPENDS ${SHADER_SOURCE}
VERBATIM)
add_custom_command(
OUTPUT ${SHADER_METALLIB}
COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR}
DEPENDS ${SHADER_IR}
VERBATIM)
set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB})
endfunction()
add_metal_shader(metal_shaders)
add_metal_shader(metal_blit)
#add_metal_shader(metal_copy_to_lut_texture)
add_custom_target(
compile_msl_shaders
DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES}
)
cmrc_add_resource_library(
resources_renderer_mtl
NAMESPACE RendererMTL
WHENCE "src/host_shaders/"
"src/host_shaders/metal_shaders.metallib"
"src/host_shaders/metal_blit.metallib"
#"src/host_shaders/metal_copy_to_lut_texture.metallib"
)
add_dependencies(resources_renderer_mtl compile_msl_shaders)
target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES})
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")
target_include_directories(AlberCore PRIVATE third_party/metal-cpp)
# TODO: check if all of them are needed
target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
endif()
source_group("Header Files\\Core" FILES ${HEADER_FILES})
set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES}
set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES}
${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES}
${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES})
target_sources(AlberCore PRIVATE ${ALL_SOURCES})
target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra)
target_link_libraries(AlberCore PUBLIC glad capstone)
target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra fdk-aac)
target_link_libraries(AlberCore PUBLIC glad capstone fmt::fmt)
if(ENABLE_DISCORD_RPC AND NOT ANDROID)
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_DISCORD_RPC=1")
@ -438,7 +605,7 @@ else()
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_FRONTEND_SDL=1")
endif()
if(NOT BUILD_HYDRA_CORE)
if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
add_executable(Alber)
if(ENABLE_QT_GUI)
@ -447,11 +614,16 @@ if(NOT BUILD_HYDRA_CORE)
message(FATAL_ERROR "Qt frontend requires OpenGL")
endif()
option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF)
set(QT_LANGUAGES docs/translations)
set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp
src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp
)
src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp
)
set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp
include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp
include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp
)
source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES})
@ -481,27 +653,47 @@ if(NOT BUILD_HYDRA_CORE)
endif()
endif()
# Generates an en.ts file for translations
# To update the file, use cmake --build --target Alber_lupdate
if(GENERATE_QT_TRANSLATION)
find_package(Qt6 REQUIRED COMPONENTS LinguistTools)
qt_add_lupdate(Alber TS_FILES ${QT_LANGUAGES}/en.ts
SOURCES ${FRONTEND_SOURCE_FILES}
INCLUDE_DIRECTORIES ${FRONTEND_HEADER_FILES}
NO_GLOBAL_TARGET
)
endif()
qt_add_resources(AlberCore "app_images"
PREFIX "/"
FILES
docs/img/rsob_icon.png docs/img/rstarstruck_icon.png
docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png docs/img/rsyn_icon.png
)
else()
set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp)
set(FRONTEND_HEADER_FILES "")
set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp")
endif()
target_link_libraries(Alber PRIVATE AlberCore)
target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES})
target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES} ${APP_RESOURCES})
elseif(BUILD_HYDRA_CORE)
target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1)
include_directories(third_party/hydra_core/include)
add_library(Alber SHARED src/hydra_core.cpp)
target_link_libraries(Alber PUBLIC AlberCore)
elseif(BUILD_LIBRETRO_CORE)
include_directories(third_party/libretro/include)
add_library(panda3ds_libretro SHARED src/libretro_core.cpp)
target_link_libraries(panda3ds_libretro PUBLIC AlberCore)
set_target_properties(panda3ds_libretro PROPERTIES PREFIX "")
endif()
if(ENABLE_LTO OR ENABLE_USER_BUILD)
set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
if (NOT BUILD_LIBRETRO_CORE)
set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
set_target_properties(panda3ds_libretro PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
endif()
if(ENABLE_TESTS)

25
cmake/FindRenderDoc.cmake Normal file
View file

@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
set(RENDERDOC_INCLUDE_DIR third_party/renderdoc)
if (RENDERDOC_INCLUDE_DIR AND EXISTS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h")
file(STRINGS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h" RENDERDOC_VERSION_LINE REGEX "typedef struct RENDERDOC_API")
string(REGEX REPLACE ".*typedef struct RENDERDOC_API_([0-9]+)_([0-9]+)_([0-9]+).*" "\\1.\\2.\\3" RENDERDOC_VERSION "${RENDERDOC_VERSION_LINE}")
unset(RENDERDOC_VERSION_LINE)
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(RenderDoc
REQUIRED_VARS RENDERDOC_INCLUDE_DIR
VERSION_VAR RENDERDOC_VERSION
)
if (RenderDoc_FOUND AND NOT TARGET RenderDoc::API)
add_library(RenderDoc::API INTERFACE IMPORTED)
set_target_properties(RenderDoc::API PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${RENDERDOC_INCLUDE_DIR}"
)
endif()
mark_as_advanced(RENDERDOC_INCLUDE_DIR)

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 212 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 65 KiB

79
docs/3ds/lighting.md Normal file
View file

@ -0,0 +1,79 @@
## Info on the lighting implementation
### Missing shadow attenuation
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.
### Missing bump mapping
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.
Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.
### samplerEnabledBitfields
Holds the enabled state of the lighting samples for various PICA configurations
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
```c
const bool samplerEnabled[9 * 7] = bool[9 * 7](
// D0 D1 SP FR RB RG RR
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
true, true, true, false, true, true, true, // Configuration 4: All except for FR
true, false, true, true, true, true, true, // Configuration 5: All except for D1
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
false, false, false, false, false, false, false, // Configuration 7: Unused
true, true, true, true, true, true, true // Configuration 8: All
);
```
The above has been condensed to two uints for performance reasons.
You can confirm they are the same by running the following:
```c
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
for (int i = 0; i < 9 * 7; i++) {
unsigned arrayIndex = (i >> 5);
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
if (samplerEnabled[i] == b) {
printf("%d: happy\n", i);
} else {
printf("%d: unhappy\n", i);
}
}
```
### lightLutLookup
lut_id is one of these values
0 D0
1 D1
2 SP
3 FR
4 RB
5 RG
6 RR
lut_index on the other hand represents the actual index of the LUT in the texture
u_tex_luts has 24 LUTs for lighting and they are used like so:
0 D0
1 D1
2 is missing because SP uses LUTs 8-15
3 FR
4 RB
5 RG
6 RR
8-15 SP0-7
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different
The light environment configuration controls which LUTs are available for use
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
### Distance attenuation
Distance attenuation is computed differently from the other factors, for example
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE

BIN
docs/img/KirbyAndroid.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 567 KiB

View file

Before

Width:  |  Height:  |  Size: 2 MiB

After

Width:  |  Height:  |  Size: 2 MiB

BIN
docs/img/rpog_icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
docs/img/rsyn_icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

BIN
docs/img/windows_icon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

1
docs/img/windows_icon.rc Normal file
View file

@ -0,0 +1 @@
AlberIcon ICON "windows_icon.ico"

View file

@ -0,0 +1,34 @@
# Software Information
display_name = "Nintendo - 3DS (Panda3DS)"
authors = "Panda3DS Authors (tm)"
supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"
corename = "Panda3DS"
categories = "Emulator"
license = "GPLv3"
permissions = ""
display_version = "Git"
# Hardware Information
manufacturer = "Nintendo"
systemname = "3DS"
systemid = "3ds"
# Libretro Information
database = "Nintendo - Nintendo 3DS"
supports_no_game = "false"
savestate = "true"
savestate_features = "basic"
cheats = "false"
input_descriptors = "true"
memory_descriptors = "false"
libretro_saves = "true"
core_options = "true"
core_options_version = "1.0"
load_subsystem = "false"
hw_render = "true"
required_hw_api = "OpenGL Core >= 4.1"
needs_fullpath = "true"
disk_control = "false"
is_experimental = "true"
description = "Panda3DS !"

View file

@ -0,0 +1,45 @@
#pragma once
#include <array>
#include "helpers.hpp"
namespace PICA {
struct DrawAcceleration {
static constexpr u32 maxAttribCount = 16;
static constexpr u32 maxLoaderCount = 12;
struct AttributeInfo {
u32 offset;
u32 stride;
u8 type;
u8 componentCount;
std::array<float, 4> fixedValue; // For fixed attributes
};
struct Loader {
// Data to upload for this loader
u8* data;
usize size;
};
u8* indexBuffer;
// Minimum and maximum index in the index buffer for a draw call
u16 minimumIndex, maximumIndex;
u32 totalAttribCount;
u32 totalLoaderCount;
u32 enabledAttributeMask;
u32 fixedAttributes;
u32 vertexDataSize;
std::array<AttributeInfo, maxAttribCount> attributeInfo;
std::array<Loader, maxLoaderCount> loaders;
bool canBeAccelerated;
bool indexed;
bool useShortIndices;
};
} // namespace PICA

View file

@ -2,7 +2,7 @@
#include "helpers.hpp"
#include "vertex_loader_rec.hpp"
// Common file for our PICA JITs (From vertex config -> CPU assembly and from PICA shader -> CPU assembly)
// Common file for our PICA JITs (From PICA shader -> CPU assembly)
namespace Dynapica {
#ifdef PANDA3DS_DYNAPICA_SUPPORTED

View file

@ -22,8 +22,11 @@ class ShaderJIT {
ShaderCache cache;
#endif
bool accurateMul = false;
public:
void setAccurateMul(bool value) { accurateMul = value; }
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
// Call this before starting to process a batch of vertices
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
@ -36,11 +39,11 @@ class ShaderJIT {
static constexpr bool isAvailable() { return true; }
#else
void prepare(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
}
void run(PICAShader& shaderUnit) {
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
}
// Define dummy callback. This should never be called if the shader JIT is not supported

View file

@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
oaknut::Label log2Func, exp2Func;
oaknut::Label emitLog2Func();
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }

View file

@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
Label negateVector;
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
Label onesVector;
// Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3
Label dp3Vector;
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
// Shows whether the loaded shader has any log2 and exp2 instructions
bool codeHasLog2 = false;
bool codeHasExp2 = false;
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
bool useSafeMUL = false;
Xbyak::Label log2Func, exp2Func;
Xbyak::Label emitLog2Func();
Xbyak::Label emitExp2Func();
Xbyak::util::Cpu cpuCaps;
// Emit a PICA200-compliant multiplication that handles "0 * inf = 0"
void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
// Compile all instructions from [current recompiler PC, end)
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
// Compile instruction "instr"
@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
PrologueCallback prologueCb = nullptr;
// Initialize our emitter with "allocSize" bytes of RWX memory
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
cpuCaps = Xbyak::util::Cpu();
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);

View file

@ -1,6 +1,7 @@
#pragma once
#include <array>
#include "PICA/draw_acceleration.hpp"
#include "PICA/dynapica/shader_rec.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_vertex.hpp"
@ -13,6 +14,12 @@
#include "memory.hpp"
#include "renderer.hpp"
enum class ShaderExecMode {
Interpreter, // Interpret shaders on the CPU
JIT, // Recompile shaders to CPU machine code
Hardware, // Recompiler shaders to host shaders and run them on the GPU
};
class GPU {
static constexpr u32 regNum = 0x300;
static constexpr u32 extRegNum = 0x1000;
@ -45,7 +52,7 @@ class GPU {
uint immediateModeVertIndex;
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
template <bool indexed, bool useShaderJIT>
template <bool indexed, ShaderExecMode mode>
void drawArrays();
// Silly method of avoiding linking problems. TODO: Change to something less silly
@ -81,6 +88,7 @@ class GPU {
std::unique_ptr<Renderer> renderer;
PICA::Vertex getImmediateModeVertex();
void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed);
public:
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
// Encoded in PICA native format
@ -92,6 +100,9 @@ class GPU {
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
bool lightingLUTDirty = false;
bool fogLUTDirty = false;
std::array<uint32_t, 128> fogLUT;
GPU(Memory& mem, EmulatorConfig& config);
void display() { renderer->display(); }
void screenshot(const std::string& name) { renderer->screenshot(name); }
@ -164,7 +175,8 @@ class GPU {
u32 index = paddr - PhysicalAddrs::VRAM;
return (T*)&vram[index];
} else [[unlikely]] {
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr);
return nullptr;
}
}

View file

@ -0,0 +1,257 @@
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <unordered_map>
#include "PICA/pica_hash.hpp"
#include "PICA/regs.hpp"
#include "bitfield.hpp"
#include "helpers.hpp"
namespace PICA {
struct OutputConfig {
union {
u32 raw{};
// Merge the enable + compare function into 1 field to avoid duplicate shaders
// enable == off means a CompareFunction of Always
BitField<0, 3, CompareFunction> alphaTestFunction;
BitField<3, 1, u32> depthMapEnable;
BitField<4, 4, LogicOpMode> logicOpMode;
};
};
struct TextureConfig {
u32 texUnitConfig;
u32 texEnvUpdateBuffer;
// There's 6 TEV stages, and each one is configured via 4 word-sized registers
// (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders)
std::array<u32, 4 * 6> tevConfigs;
};
struct FogConfig {
union {
u32 raw{};
BitField<0, 3, FogMode> mode;
BitField<3, 1, u32> flipDepth;
};
};
struct Light {
union {
u16 raw;
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> twoSidedDiffuse;
BitField<5, 1, u16> distanceAttenuationEnable;
BitField<6, 1, u16> spotAttenuationEnable;
BitField<7, 1, u16> geometricFactor0;
BitField<8, 1, u16> geometricFactor1;
BitField<9, 1, u16> shadowEnable;
};
};
struct LightingLUTConfig {
union {
u32 raw;
BitField<0, 1, u32> enable;
BitField<1, 1, u32> absInput;
BitField<2, 3, u32> type;
BitField<5, 3, u32> scale;
};
};
struct LightingConfig {
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 4, u32> lightNum;
BitField<5, 2, u32> bumpMode;
BitField<7, 2, u32> bumpSelector;
BitField<9, 1, u32> bumpRenorm;
BitField<10, 1, u32> clampHighlights;
BitField<11, 4, u32> config;
BitField<15, 1, u32> enablePrimaryAlpha;
BitField<16, 1, u32> enableSecondaryAlpha;
BitField<17, 1, u32> enableShadow;
BitField<18, 1, u32> shadowPrimary;
BitField<19, 1, u32> shadowSecondary;
BitField<20, 1, u32> shadowInvert;
BitField<21, 1, u32> shadowAlpha;
BitField<22, 2, u32> shadowSelector;
};
std::array<LightingLUTConfig, 7> luts{};
std::array<Light, 8> lights{};
LightingConfig(const std::array<u32, 0x300>& regs) {
// Ignore lighting registers if it's disabled
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
return;
}
const u32 config0 = regs[InternalRegs::LightConfig0];
const u32 config1 = regs[InternalRegs::LightConfig1];
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
enable = 1;
lightNum = totalLightCount;
enableShadow = Helpers::getBit<0>(config0);
if (enableShadow) [[unlikely]] {
shadowPrimary = Helpers::getBit<16>(config0);
shadowSecondary = Helpers::getBit<17>(config0);
shadowInvert = Helpers::getBit<18>(config0);
shadowAlpha = Helpers::getBit<19>(config0);
shadowSelector = Helpers::getBits<24, 2>(config0);
}
enablePrimaryAlpha = Helpers::getBit<2>(config0);
enableSecondaryAlpha = Helpers::getBit<3>(config0);
config = Helpers::getBits<4, 4>(config0);
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
light.directional = Helpers::getBit<0>(lightConfig);
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
}
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
LightingLUTConfig& sp = luts[spotlightLutIndex];
LightingLUTConfig& fr = luts[Lights::LUT_FR];
LightingLUTConfig& rb = luts[Lights::LUT_RB];
LightingLUTConfig& rg = luts[Lights::LUT_RG];
LightingLUTConfig& rr = luts[Lights::LUT_RR];
d0.enable = Helpers::getBit<16>(config1) == 0;
d1.enable = Helpers::getBit<17>(config1) == 0;
fr.enable = Helpers::getBit<19>(config1) == 0;
rb.enable = Helpers::getBit<20>(config1) == 0;
rg.enable = Helpers::getBit<21>(config1) == 0;
rr.enable = Helpers::getBit<22>(config1) == 0;
sp.enable = 1;
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
const u32 lutScale = regs[InternalRegs::LightLUTScale];
if (d0.enable) {
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
d0.type = Helpers::getBits<0, 3>(lutSelect);
d0.scale = Helpers::getBits<0, 3>(lutScale);
}
if (d1.enable) {
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
d1.type = Helpers::getBits<4, 3>(lutSelect);
d1.scale = Helpers::getBits<4, 3>(lutScale);
}
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
sp.type = Helpers::getBits<8, 3>(lutSelect);
sp.scale = Helpers::getBits<8, 3>(lutScale);
if (fr.enable) {
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
fr.type = Helpers::getBits<12, 3>(lutSelect);
fr.scale = Helpers::getBits<12, 3>(lutScale);
}
if (rb.enable) {
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
rb.type = Helpers::getBits<16, 3>(lutSelect);
rb.scale = Helpers::getBits<16, 3>(lutScale);
}
if (rg.enable) {
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
rg.type = Helpers::getBits<20, 3>(lutSelect);
rg.scale = Helpers::getBits<20, 3>(lutScale);
}
if (rr.enable) {
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
rr.type = Helpers::getBits<24, 3>(lutSelect);
rr.scale = Helpers::getBits<24, 3>(lutScale);
}
}
};
// Config used for identifying unique fragment pipeline configurations
struct FragmentConfig {
OutputConfig outConfig;
TextureConfig texConfig;
FogConfig fogConfig;
LightingConfig lighting;
bool operator==(const FragmentConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction =
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
// Shows if blending is enabled. If it is not enabled, then logic ops are enabled instead
const bool blendingEnabled = (regs[InternalRegs::ColourOperation] & (1 << 8)) != 0;
outConfig.logicOpMode = blendingEnabled ? LogicOpMode::Copy : LogicOpMode(Helpers::getBits<0, 4>(regs[InternalRegs::LogicOp]));
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
#define setupTevStage(stage) \
std::memcpy(&texConfig.tevConfigs[stage * 4], &regs[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
setupTevStage(0);
setupTevStage(1);
setupTevStage(2);
setupTevStage(3);
setupTevStage(4);
setupTevStage(5);
#undef setupTevStage
fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]);
if (fogConfig.mode == FogMode::Fog) {
fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]);
}
}
};
static_assert(
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
std::has_unique_object_representations<FogConfig>() && std::has_unique_object_representations<Light>()
);
} // namespace PICA
// Override std::hash for our fragment config class
template <>
struct std::hash<PICA::FragmentConfig> {
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
};

View file

@ -0,0 +1,47 @@
#pragma once
#include <array>
#include <cstddef>
#include <type_traits>
#include "helpers.hpp"
namespace PICA {
struct LightUniform {
using vec3 = std::array<float, 3>;
// std140 requires vec3s be aligned to 16 bytes
alignas(16) vec3 specular0;
alignas(16) vec3 specular1;
alignas(16) vec3 diffuse;
alignas(16) vec3 ambient;
alignas(16) vec3 position;
alignas(16) vec3 spotlightDirection;
float distanceAttenuationBias;
float distanceAttenuationScale;
};
struct FragmentUniforms {
using vec3 = std::array<float, 3>;
using vec4 = std::array<float, 4>;
static constexpr usize tevStageCount = 6;
s32 alphaReference;
float depthScale;
float depthOffset;
alignas(16) vec4 constantColors[tevStageCount];
alignas(16) vec4 tevBufferColor;
alignas(16) vec4 clipCoords;
// Note: We upload these as a u32 and decode on GPU.
// Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU.
u32 globalAmbientLight;
u32 fogColor;
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
LightUniform lightUniforms[8];
};
// Assert that lightUniforms is the last member of the structure
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
} // namespace PICA

274
include/PICA/pica_simd.hpp Normal file
View file

@ -0,0 +1,274 @@
#pragma once
#include <algorithm>
#include <limits>
#include <utility>
#include "helpers.hpp"
#if defined(_M_AMD64) || defined(__x86_64__)
#define PICA_SIMD_X64
#include <immintrin.h>
#elif defined(_M_ARM64) || defined(__aarch64__)
#define PICA_SIMD_ARM64
#include <arm_neon.h>
#endif
// Optimized functions for analyzing PICA index buffers (Finding minimum and maximum index values inside them)
namespace PICA::IndexBuffer {
// Non-SIMD, portable algorithm
template <bool useShortIndices>
std::pair<u16, u16> analyzePortable(u8* indexBuffer, u32 vertexCount) {
u16 minimumIndex = std::numeric_limits<u16>::max();
u16 maximumIndex = 0;
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
if constexpr (useShortIndices) {
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
for (u32 i = 0; i < vertexCount; i++) {
u16 index = indexBuffer16[i];
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
} else {
for (u32 i = 0; i < vertexCount; i++) {
u16 index = u16(indexBuffer[i]);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
}
return {minimumIndex, maximumIndex};
}
#ifdef PICA_SIMD_ARM64
template <bool useShortIndices>
std::pair<u16, u16> analyzeNEON(u8* indexBuffer, u32 vertexCount) {
// We process 16 bytes per iteration, which is 8 vertices if we're using u16 indices or 16 vertices if we're using u8 indices
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
if (vertexCount < vertsPerLoop) {
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
}
u16 minimumIndex, maximumIndex;
if constexpr (useShortIndices) {
// 16-bit indices
uint16x8_t minima = vdupq_n_u16(0xffff);
uint16x8_t maxima = vdupq_n_u16(0);
while (vertexCount >= vertsPerLoop) {
const uint16x8_t data = vld1q_u16(reinterpret_cast<u16*>(indexBuffer));
minima = vminq_u16(data, minima);
maxima = vmaxq_u16(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
// Do horizontal min/max operations to get the actual minimum and maximum from all the vertices we processed with SIMD
// We want to gather the actual minimum and maximum in the line bottom lane of the minima/maxima vectors
// uint16x4_t foldedMinima1 = vmin_u16(vget_high_u16(minima), vget_low_u16(minima));
// uint16x4_t foldedMaxima1 = vmax_u16(vget_high_u16(maxima), vget_low_u16(maxima));
uint16x8_t foldedMinima1 = vpminq_u16(minima, minima);
uint16x8_t foldedMinima2 = vpminq_u16(foldedMinima1, foldedMinima1);
uint16x8_t foldedMinima3 = vpminq_u16(foldedMinima2, foldedMinima2);
uint16x8_t foldedMaxima1 = vpmaxq_u16(maxima, maxima);
uint16x8_t foldedMaxima2 = vpmaxq_u16(foldedMaxima1, foldedMaxima1);
uint16x8_t foldedMaxima3 = vpmaxq_u16(foldedMaxima2, foldedMaxima2);
minimumIndex = vgetq_lane_u16(foldedMinima3, 0);
maximumIndex = vgetq_lane_u16(foldedMaxima3, 0);
} else {
// 8-bit indices
uint8x16_t minima = vdupq_n_u8(0xff);
uint8x16_t maxima = vdupq_n_u8(0);
while (vertexCount >= vertsPerLoop) {
uint8x16_t data = vld1q_u8(indexBuffer);
minima = vminq_u8(data, minima);
maxima = vmaxq_u8(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
// Do a similar horizontal min/max as in the u16 case, except now we're working uint8x16 instead of uint16x4 so we need 4 folds
uint8x16_t foldedMinima1 = vpminq_u8(minima, minima);
uint8x16_t foldedMinima2 = vpminq_u8(foldedMinima1, foldedMinima1);
uint8x16_t foldedMinima3 = vpminq_u8(foldedMinima2, foldedMinima2);
uint8x16_t foldedMinima4 = vpminq_u8(foldedMinima3, foldedMinima3);
uint8x16_t foldedMaxima1 = vpmaxq_u8(maxima, maxima);
uint8x16_t foldedMaxima2 = vpmaxq_u8(foldedMaxima1, foldedMaxima1);
uint8x16_t foldedMaxima3 = vpmaxq_u8(foldedMaxima2, foldedMaxima2);
uint8x16_t foldedMaxima4 = vpmaxq_u8(foldedMaxima3, foldedMaxima3);
minimumIndex = u16(vgetq_lane_u8(foldedMinima4, 0));
maximumIndex = u16(vgetq_lane_u8(foldedMaxima4, 0));
}
// If any indices could not be processed cause the buffer size is not 16-byte aligned, process them the naive way
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
while (vertexCount > 0) {
if constexpr (useShortIndices) {
u16 index = *reinterpret_cast<u16*>(indexBuffer);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
indexBuffer += 2;
} else {
u16 index = u16(*indexBuffer++);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
vertexCount -= 1;
}
return {minimumIndex, maximumIndex};
}
#endif
#if defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
template <bool useShortIndices>
std::pair<u16, u16> analyzeSSE4_1(u8* indexBuffer, u32 vertexCount) {
// We process 16 bytes per iteration, which is 8 vertices if we're using u16
// indices or 16 vertices if we're using u8 indices
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
if (vertexCount < vertsPerLoop) {
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
}
u16 minimumIndex, maximumIndex;
if constexpr (useShortIndices) {
// Calculate the horizontal minimum/maximum value across an SSE vector of 16-bit unsigned integers.
// Based on https://stackoverflow.com/a/22259607
auto horizontalMin16 = [](__m128i vector) -> u16 { return u16(_mm_cvtsi128_si32(_mm_minpos_epu16(vector))); };
auto horizontalMax16 = [](__m128i vector) -> u16 {
// We have an instruction to compute horizontal minimum but not maximum, so we use it.
// To use it, we have to subtract each value from 0xFFFF (which we do with an xor), then execute a horizontal minimum
__m128i flipped = _mm_xor_si128(vector, _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu));
u16 min = u16(_mm_cvtsi128_si32(_mm_minpos_epu16(flipped)));
return u16(min ^ 0xffff);
};
// 16-bit indices
// Initialize the minima vector to all FFs (So 0xFFFF for each 16-bit lane)
// And the maxima vector to all 0s (0 for each 16-bit lane)
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
while (vertexCount >= vertsPerLoop) {
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
minima = _mm_min_epu16(data, minima);
maxima = _mm_max_epu16(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
minimumIndex = u16(horizontalMin16(minima));
maximumIndex = u16(horizontalMax16(maxima));
} else {
// Calculate the horizontal minimum/maximum value across an SSE vector of 8-bit unsigned integers.
// Based on https://stackoverflow.com/a/22259607
auto horizontalMin8 = [](__m128i vector) -> u8 {
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_min_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_min_epu8(vector, _mm_srli_epi16(vector, 8));
return u8(_mm_cvtsi128_si32(vector));
};
auto horizontalMax8 = [](__m128i vector) -> u8 {
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_max_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
vector = _mm_max_epu8(vector, _mm_srli_epi16(vector, 8));
return u8(_mm_cvtsi128_si32(vector));
};
// 8-bit indices
// Initialize the minima vector to all FFs (So 0xFF for each 8-bit lane)
// And the maxima vector to all 0s (0 for each 8-bit lane)
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
while (vertexCount >= vertsPerLoop) {
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
minima = _mm_min_epu8(data, minima);
maxima = _mm_max_epu8(data, maxima);
indexBuffer += 16;
vertexCount -= vertsPerLoop;
}
minimumIndex = u16(horizontalMin8(minima));
maximumIndex = u16(horizontalMax8(maxima));
}
// If any indices could not be processed cause the buffer size
// is not 16-byte aligned, process them the naive way
// Calculate the minimum and maximum indices used in the index
// buffer, so we'll only upload them
while (vertexCount > 0) {
if constexpr (useShortIndices) {
u16 index = *reinterpret_cast<u16*>(indexBuffer);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
indexBuffer += 2;
} else {
u16 index = u16(*indexBuffer++);
minimumIndex = std::min(minimumIndex, index);
maximumIndex = std::max(maximumIndex, index);
}
vertexCount -= 1;
}
return {minimumIndex, maximumIndex};
}
#endif
// Analyzes a PICA index buffer to get the minimum and maximum indices in the
// buffer, and returns them in a pair in the form [min, max]. Takes a template
// parameter to decide whether the indices in the buffer are u8 or u16
template <bool useShortIndices>
std::pair<u16, u16> analyze(u8* indexBuffer, u32 vertexCount) {
#if defined(PICA_SIMD_ARM64)
return analyzeNEON<useShortIndices>(indexBuffer, vertexCount);
#elif defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
// Annoyingly, MSVC refuses to define __SSE4_1__ even when we're building with AVX
return analyzeSSE4_1<useShortIndices>(indexBuffer, vertexCount);
#else
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
#endif
}
// In some really unfortunate scenarios (eg Android Studio emulator), we don't have access to glDrawRangeElementsBaseVertex
// So we need to subtract the base vertex index from every index in the index buffer ourselves
// This is not really common, so we do it without SIMD for the moment, just to be able to run on Android Studio
template <bool useShortIndices>
void subtractBaseIndex(u8* indexBuffer, u32 indexCount, u16 baseIndex) {
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
if constexpr (useShortIndices) {
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
for (u32 i = 0; i < indexCount; i++) {
indexBuffer16[i] -= baseIndex;
}
} else {
u8 baseIndex8 = u8(baseIndex);
for (u32 i = 0; i < indexCount; i++) {
indexBuffer[i] -= baseIndex8;
}
}
}
} // namespace PICA::IndexBuffer

View file

@ -0,0 +1,57 @@
#pragma once
#include <array>
#include <cassert>
#include <cstring>
#include <type_traits>
#include <unordered_map>
#include "PICA/pica_hash.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader.hpp"
#include "bitfield.hpp"
#include "helpers.hpp"
namespace PICA {
// Configuration struct used
struct VertConfig {
PICAHash::HashType shaderHash;
PICAHash::HashType opdescHash;
u32 entrypoint;
// PICA registers for configuring shader output->fragment semantic mapping
std::array<u32, 7> outmaps{};
u16 outputMask;
u8 outputCount;
bool usingUbershader;
// Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup
// As the padding will get hashed and memcmp'd...
u32 pad{};
bool operator==(const VertConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(VertConfig)) == 0;
}
VertConfig(PICAShader& shader, const std::array<u32, 0x300>& regs, bool usingUbershader) : usingUbershader(usingUbershader) {
shaderHash = shader.getCodeHash();
opdescHash = shader.getOpdescHash();
entrypoint = shader.entrypoint;
outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask];
for (int i = 0; i < outputCount; i++) {
// Mask out unused bits
outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F;
}
}
};
} // namespace PICA
static_assert(sizeof(PICA::VertConfig) == 56);
// Override std::hash for our vertex config class
template <>
struct std::hash<PICA::VertConfig> {
std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
};

View file

@ -51,6 +51,18 @@ namespace PICA {
#undef defineTexEnv
// clang-format on
// Fog registers
FogColor = 0xE1,
FogLUTIndex = 0xE6,
FogLUTData0 = 0xE8,
FogLUTData1 = 0xE9,
FogLUTData2 = 0xEA,
FogLUTData3 = 0xEB,
FogLUTData4 = 0xEC,
FogLUTData5 = 0xED,
FogLUTData6 = 0xEE,
FogLUTData7 = 0xEF,
// Framebuffer registers
ColourOperation = 0x100,
BlendFunc = 0x101,
@ -67,7 +79,29 @@ namespace PICA {
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
//LightingRegs
// Lighting registers
LightingEnable = 0x8F,
Light0Specular0 = 0x140,
Light0Specular1 = 0x141,
Light0Diffuse = 0x142,
Light0Ambient = 0x143,
Light0XY = 0x144,
Light0Z = 0x145,
Light0SpotlightXY = 0x146,
Light0SpotlightZ = 0x147,
Light0Config = 0x149,
Light0AttenuationBias = 0x14A,
Light0AttenuationScale = 0x14B,
LightGlobalAmbient = 0x1C0,
LightNumber = 0x1C2,
LightConfig0 = 0x1C3,
LightConfig1 = 0x1C4,
LightPermutation = 0x1D9,
LightLUTAbs = 0x1D0,
LightLUTSelect = 0x1D1,
LightLUTScale = 0x1D2,
LightingLUTIndex = 0x01C5,
LightingLUTData0 = 0x01C8,
LightingLUTData1 = 0x01C9,
@ -231,7 +265,8 @@ namespace PICA {
enum : u32 {
LUT_D0 = 0,
LUT_D1,
LUT_FR,
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
LUT_FR = 0x3,
LUT_RB,
LUT_RG,
LUT_RR,
@ -255,6 +290,11 @@ namespace PICA {
};
}
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
// This is particularly intuitive in several places, such as checking if a LUT is enabled
static constexpr int spotlightLutIndex = 2;
enum class TextureFmt : u32 {
RGBA8 = 0x0,
RGB8 = 0x1,
@ -345,4 +385,156 @@ namespace PICA {
GeometryPrimitive = 3,
};
enum class CompareFunction : u32 {
Never = 0,
Always = 1,
Equal = 2,
NotEqual = 3,
Less = 4,
LessOrEqual = 5,
Greater = 6,
GreaterOrEqual = 7,
};
enum class LogicOpMode : u32 {
Clear = 0,
And = 1,
ReverseAnd = 2,
Copy = 3,
Set = 4,
InvertedCopy = 5,
Nop = 6,
Invert = 7,
Nand = 8,
Or = 9,
Nor = 10,
Xor = 11,
Equiv = 12,
InvertedAnd = 13,
ReverseOr = 14,
InvertedOr = 15,
};
enum class FogMode : u32 {
Disabled = 0,
Fog = 5,
Gas = 7,
};
struct TexEnvConfig {
enum class Source : u8 {
PrimaryColor = 0x0,
PrimaryFragmentColor = 0x1,
SecondaryFragmentColor = 0x2,
Texture0 = 0x3,
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
// TODO: Inbetween values are unknown
PreviousBuffer = 0xD,
Constant = 0xE,
Previous = 0xF,
};
enum class ColorOperand : u8 {
SourceColor = 0x0,
OneMinusSourceColor = 0x1,
SourceAlpha = 0x2,
OneMinusSourceAlpha = 0x3,
SourceRed = 0x4,
OneMinusSourceRed = 0x5,
// TODO: Inbetween values are unknown
SourceGreen = 0x8,
OneMinusSourceGreen = 0x9,
// Inbetween values are unknown
SourceBlue = 0xC,
OneMinusSourceBlue = 0xD,
};
enum class AlphaOperand : u8 {
SourceAlpha = 0x0,
OneMinusSourceAlpha = 0x1,
SourceRed = 0x2,
OneMinusSourceRed = 0x3,
SourceGreen = 0x4,
OneMinusSourceGreen = 0x5,
SourceBlue = 0x6,
OneMinusSourceBlue = 0x7,
};
enum class Operation : u8 {
Replace = 0,
Modulate = 1,
Add = 2,
AddSigned = 3,
Lerp = 4,
Subtract = 5,
Dot3RGB = 6,
Dot3RGBA = 7,
MultiplyAdd = 8,
AddMultiply = 9,
};
// RGB sources
Source colorSource1, colorSource2, colorSource3;
// Alpha sources
Source alphaSource1, alphaSource2, alphaSource3;
// RGB operands
ColorOperand colorOperand1, colorOperand2, colorOperand3;
// Alpha operands
AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3;
// Texture environment operations for this stage
Operation colorOp, alphaOp;
u32 constColor;
private:
// These are the only private members since their value doesn't actually reflect the scale
// So we make them public so we'll always use the appropriate member functions instead
u8 colorScale;
u8 alphaScale;
public:
// Create texture environment object from TEV registers
TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) {
colorSource1 = Helpers::getBits<0, 4, Source>(source);
colorSource2 = Helpers::getBits<4, 4, Source>(source);
colorSource3 = Helpers::getBits<8, 4, Source>(source);
alphaSource1 = Helpers::getBits<16, 4, Source>(source);
alphaSource2 = Helpers::getBits<20, 4, Source>(source);
alphaSource3 = Helpers::getBits<24, 4, Source>(source);
colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand);
colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand);
colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand);
alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand);
alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand);
alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand);
colorOp = Helpers::getBits<0, 4, Operation>(combiner);
alphaOp = Helpers::getBits<16, 4, Operation>(combiner);
colorScale = Helpers::getBits<0, 2>(scale);
alphaScale = Helpers::getBits<16, 2>(scale);
}
u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; }
u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; }
bool isPassthroughStage() {
// clang-format off
// Thank you to the Citra dev that wrote this out
return (
colorOp == Operation::Replace && alphaOp == Operation::Replace &&
colorSource1 == Source::Previous && alphaSource1 == Source::Previous &&
colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha &&
getColorScale() == 1 && getAlphaScale() == 1
);
// clang-format on
}
};
} // namespace PICA

View file

@ -1,6 +1,8 @@
#pragma once
#include <algorithm>
#include <array>
#include <cassert>
#include <cstddef>
#include <cstring>
#include "PICA/float_types.hpp"
@ -21,7 +23,7 @@ namespace ShaderOpcodes {
DST = 0x04,
EX2 = 0x05,
LG2 = 0x06,
LIT = 0x07,
LITP = 0x07,
MUL = 0x08,
SGE = 0x09,
SLT = 0x0A,
@ -56,6 +58,10 @@ namespace ShaderOpcodes {
};
}
namespace PICA::ShaderGen {
class ShaderDecompiler;
};
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
class PICAShader {
using f24 = Floats::f24;
@ -90,14 +96,22 @@ class PICAShader {
public:
// These are placed close to the temp registers and co because it helps the JIT generate better code
u32 entrypoint = 0; // Initial shader PC
u32 boolUniform;
std::array<std::array<u8, 4>, 4> intUniforms;
// We want these registers in this order & with this alignment for uploading them directly to a UBO
// When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers.
alignas(16) std::array<vec4f, 96> floatUniforms;
alignas(16) std::array<std::array<u8, 4>, 4> intUniforms;
u32 boolUniform;
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
alignas(16) std::array<vec4f, 16> outputs;
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
using Hash = PICAHash::HashType;
protected:
std::array<u32, 128> operandDescriptors;
@ -116,20 +130,20 @@ class PICAShader {
std::array<CallInfo, 4> callInfo;
ShaderType type;
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
using Hash = PICAHash::HashType;
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
public:
bool uniformsDirty = false;
protected:
bool codeHashDirty = false;
bool opdescHashDirty = false;
// Add these as friend classes for the JIT so it has access to all important state
friend class ShaderJIT;
friend class ShaderEmitter;
friend class PICA::ShaderGen::ShaderDecompiler;
vec4f getSource(u32 source);
vec4f& getDest(u32 dest);
@ -151,6 +165,7 @@ class PICAShader {
void jmpc(u32 instruction);
void jmpu(u32 instruction);
void lg2(u32 instruction);
void litp(u32 instruction);
void loop(u32 instruction);
void mad(u32 instruction);
void madi(u32 instruction);
@ -220,13 +235,9 @@ class PICAShader {
public:
static constexpr size_t maxInstructionCount = 4096;
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
PICAShader(ShaderType type) : type(type) {}
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
@ -235,7 +246,7 @@ class PICAShader {
Helpers::panic("o no, shader upload overflew");
}
bufferedShader[bufferIndex++] = word;
loadedShader[bufferIndex++] = word;
bufferIndex &= 0xfff;
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
@ -256,14 +267,16 @@ class PICAShader {
void uploadFloatUniform(u32 word) {
floatUniformBuffer[floatUniformWordCount++] = word;
if (floatUniformIndex >= 96) {
Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
}
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
vec4f& uniform = floatUniforms[floatUniformIndex++];
floatUniformWordCount = 0;
// Check if the program tries to upload to a non-existent uniform, and empty the queue without writing in that case
if (floatUniformIndex >= 96) [[unlikely]] {
return;
}
vec4f& uniform = floatUniforms[floatUniformIndex++];
if (f32UniformTransfer) {
uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
@ -275,6 +288,7 @@ class PICAShader {
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
}
uniformsDirty = true;
}
}
@ -286,6 +300,12 @@ class PICAShader {
u[1] = getBits<8, 8>(word);
u[2] = getBits<16, 8>(word);
u[3] = getBits<24, 8>(word);
uniformsDirty = true;
}
void uploadBoolUniform(u32 value) {
boolUniform = value;
uniformsDirty = true;
}
void run();
@ -293,4 +313,13 @@ class PICAShader {
Hash getCodeHash();
Hash getOpdescHash();
};
// Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU.
static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); }
void* getUniformPointer() { return static_cast<void*>(&floatUniforms); }
};
static_assert(
offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 &&
offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4
);

View file

@ -0,0 +1,131 @@
#pragma once
#include <fmt/format.h>
#include <map>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "PICA/shader.hpp"
#include "PICA/shader_gen_types.hpp"
struct EmulatorConfig;
namespace PICA::ShaderGen {
// Control flow analysis is partially based on
// https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33
struct ControlFlow {
// A continuous range of addresses
struct AddressRange {
u32 start, end;
AddressRange(u32 start, u32 end) : start(start), end(end) {}
// Use lexicographic comparison for functions in order to sort them in a set
bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); }
};
struct Function {
using Labels = std::set<u32>;
enum class ExitMode {
Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans)
AlwaysReturn, // All paths reach the return point.
Conditional, // One or more code paths reach the return point or an END instruction conditionally.
AlwaysEnd, // All paths reach an END instruction.
};
u32 start; // Starting PC of the function
u32 end; // End PC of the function
Labels outLabels{}; // Labels this function can "goto" (jump) to
ExitMode exitMode = ExitMode::Unknown;
explicit Function(u32 start, u32 end) : start(start), end(end) {}
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); }
// To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end
// instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called
// from within functions deep in the callstack
std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); }
std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); }
};
std::set<Function> functions{};
std::map<AddressRange, Function::ExitMode> exitMap{};
// Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation
// On the CPU
bool analysisFailed = false;
// This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions
const Function* addFunction(const PICAShader& shader, u32 start, u32 end) {
auto searchIterator = functions.find(Function(start, end));
if (searchIterator != functions.end()) {
return &(*searchIterator);
}
// Add this function and analyze it if it doesn't already exist
Function function(start, end);
function.exitMode = analyzeFunction(shader, start, end, function.outLabels);
// This function could not be fully analyzed, report failure
if (function.exitMode == Function::ExitMode::Unknown) {
analysisFailed = true;
return nullptr;
}
// Add function to our function list
auto [it, added] = functions.insert(std::move(function));
return &(*it);
}
void analyze(const PICAShader& shader, u32 entrypoint);
Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels);
};
class ShaderDecompiler {
using AddressRange = ControlFlow::AddressRange;
using Function = ControlFlow::Function;
ControlFlow controlFlow{};
PICAShader& shader;
EmulatorConfig& config;
std::string decompiledShader;
u32 entrypoint;
API api;
Language language;
bool compilationError = false;
void compileInstruction(u32& pc, bool& finished);
// Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction)
std::pair<u32, bool> compileRange(const AddressRange& range);
void callFunction(const Function& function);
const Function* findFunction(const AddressRange& range);
void writeAttributes();
std::string getSource(u32 source, u32 index) const;
std::string getDest(u32 dest) const;
std::string getSwizzlePattern(u32 swizzle) const;
std::string getDestSwizzle(u32 destinationMask) const;
const char* getCondition(u32 cond, u32 refX, u32 refY);
void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value);
// Returns if the instruction uses the typical register encodings most instructions use
// With some exceptions like MAD/MADI, and the control flow instructions which are completely different
bool usesCommonEncoding(u32 instruction) const;
public:
ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language)
: shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {}
std::string decompile();
};
std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language);
} // namespace PICA::ShaderGen

View file

@ -0,0 +1,43 @@
#pragma once
#include <string>
#include "PICA/gpu.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/pica_vert_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen_types.hpp"
#include "helpers.hpp"
namespace PICA::ShaderGen {
class FragmentGenerator {
API api;
Language language;
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
bool isSamplerEnabled(u32 environmentID, u32 lutID);
void compileFog(std::string& shader, const PICA::FragmentConfig& config);
void compileLogicOps(std::string& shader, const PICA::FragmentConfig& config);
public:
FragmentGenerator(API api, Language language) : api(api), language(language) {}
std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr);
std::string getDefaultVertexShader();
// For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader
std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader);
void setTarget(API api, Language language) {
this->api = api;
this->language = language;
}
};
}; // namespace PICA::ShaderGen

View file

@ -0,0 +1,9 @@
#pragma once
namespace PICA::ShaderGen {
// Graphics API this shader is targetting
enum class API { GL, GLES, Vulkan };
// Shading language to use (Only GLSL for the time being)
enum class Language { GLSL };
} // namespace PICA::ShaderGen

View file

@ -2,10 +2,9 @@
#include "PICA/shader.hpp"
class ShaderUnit {
public:
PICAShader vs; // Vertex shader
PICAShader gs; // Geometry shader
public:
PICAShader vs; // Vertex shader
PICAShader gs; // Geometry shader
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
void reset();

99
include/align.hpp Normal file
View file

@ -0,0 +1,99 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#pragma once
#include <cstdlib>
#include "helpers.hpp"
#ifdef _WIN32
#include <malloc.h>
#endif
namespace Common {
template <typename T>
constexpr bool isAligned(T value, unsigned int alignment) {
return (value % static_cast<T>(alignment)) == 0;
}
template <typename T>
constexpr T alignUp(T value, unsigned int alignment) {
return (value + static_cast<T>(alignment - 1)) / static_cast<T>(alignment) * static_cast<T>(alignment);
}
template <typename T>
constexpr T alignDown(T value, unsigned int alignment) {
return value / static_cast<T>(alignment) * static_cast<T>(alignment);
}
template <typename T>
constexpr bool isAlignedPow2(T value, unsigned int alignment) {
return (value & static_cast<T>(alignment - 1)) == 0;
}
template <typename T>
constexpr T alignUpPow2(T value, unsigned int alignment) {
return (value + static_cast<T>(alignment - 1)) & static_cast<T>(~static_cast<T>(alignment - 1));
}
template <typename T>
constexpr T alignDownPow2(T value, unsigned int alignment) {
return value & static_cast<T>(~static_cast<T>(alignment - 1));
}
template <typename T>
constexpr bool isPow2(T value) {
return (value & (value - 1)) == 0;
}
template <typename T>
constexpr T previousPow2(T value) {
if (value == static_cast<T>(0)) return 0;
value |= (value >> 1);
value |= (value >> 2);
value |= (value >> 4);
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
return value - (value >> 1);
}
template <typename T>
constexpr T nextPow2(T value) {
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
if (value == static_cast<T>(0)) return 0;
value--;
value |= (value >> 1);
value |= (value >> 2);
value |= (value >> 4);
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
value++;
return value;
}
ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) {
#ifdef _WIN32
return _aligned_malloc(size, alignment);
#else
// Unaligned sizes are slow on macOS.
#ifdef __APPLE__
if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1);
#endif
void* ret = nullptr;
return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr;
#endif
}
ALWAYS_INLINE static void alignedFree(void* ptr) {
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
} // namespace Common

82
include/audio/aac.hpp Normal file
View file

@ -0,0 +1,82 @@
#pragma once
#include <array>
#include <type_traits>
#include "helpers.hpp"
#include "swap.hpp"
namespace Audio::AAC {
namespace ResultCode {
enum : u32 {
Success = 0,
};
}
// Enum values and struct definitions based off Citra
namespace Command {
enum : u16 {
Init = 0, // Initialize encoder/decoder
EncodeDecode = 1, // Encode/Decode AAC
Shutdown = 2, // Shutdown encoder/decoder
LoadState = 3,
SaveState = 4,
};
}
namespace SampleRate {
enum : u32 {
Rate48000 = 0,
Rate44100 = 1,
Rate32000 = 2,
Rate24000 = 3,
Rate22050 = 4,
Rate16000 = 5,
Rate12000 = 6,
Rate11025 = 7,
Rate8000 = 8,
};
}
namespace Mode {
enum : u16 {
None = 0,
Decode = 1,
Encode = 2,
};
}
struct DecodeResponse {
u32_le sampleRate;
u32_le channelCount;
u32_le size;
u32_le unknown1;
u32_le unknown2;
u32_le sampleCount;
};
struct DecodeRequest {
u32_le address; // Address of input AAC stream
u32_le size; // Size of input AAC stream
u32_le destAddrLeft; // Output address for left channel samples
u32_le destAddrRight; // Output address for right channel samples
u32_le unknown1;
u32_le unknown2;
};
struct Message {
u16_le mode = Mode::None; // Encode or decode AAC?
u16_le command = Command::Init;
u32_le resultCode = ResultCode::Success;
// Info on the AAC request
union {
std::array<u8, 24> commandData{};
DecodeResponse decodeResponse;
DecodeRequest decodeRequest;
};
};
static_assert(sizeof(Message) == 32);
static_assert(std::is_trivially_copyable<Message>());
} // namespace Audio::AAC

View file

@ -0,0 +1,24 @@
#pragma once
#include <functional>
#include "audio/aac.hpp"
#include "helpers.hpp"
struct AAC_DECODER_INSTANCE;
namespace Audio::AAC {
class Decoder {
using DecoderHandle = AAC_DECODER_INSTANCE*;
using PaddrCallback = std::function<u8*(u32)>;
DecoderHandle decoderHandle = nullptr;
bool isInitialized() { return decoderHandle != nullptr; }
void initialize();
public:
// Decode function. Takes in a reference to the AAC response & request, and a callback for paddr -> pointer conversions
void decode(AAC::Message& response, const AAC::Message& request, PaddrCallback paddrCallback);
~Decoder();
};
} // namespace Audio::AAC

View file

@ -43,7 +43,7 @@ namespace Audio {
virtual ~DSPCore() {}
virtual void reset() = 0;
virtual void runAudioFrame() = 0;
virtual void runAudioFrame(u64 eventTimestamp) = 0;
virtual u8* getDspMemory() = 0;
virtual u16 recvData(u32 regId) = 0;

View file

@ -294,12 +294,12 @@ namespace Audio::HLE {
struct SourceStatus {
struct Status {
u8 isEnabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
u8 enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
u8 currentBufferIDDirty; ///< Non-zero when current_buffer_id changes
u16_le syncCount; ///< Is set by the DSP to the value of SourceConfiguration::sync_count
u32_dsp bufferPosition; ///< Number of samples into the current buffer
u32_dsp samplePosition; ///< Number of samples into the current buffer
u16_le currentBufferID; ///< Updated when a buffer finishes playing
u16_le lastBufferID; ///< Updated when all buffers in the queue finish playing
u16_le previousBufferID; ///< Updated when all buffers in the queue finish playing
};
Status status[sourceCount];
@ -324,8 +324,8 @@ namespace Audio::HLE {
BitField<15, 1, u32> outputBufferCountDirty;
BitField<16, 1, u32> masterVolumeDirty;
BitField<24, 1, u32> auxReturnVolume0Dirty;
BitField<25, 1, u32> auxReturnVolume1Dirty;
BitField<24, 1, u32> auxVolume0Dirty;
BitField<25, 1, u32> auxVolume1Dirty;
BitField<26, 1, u32> outputFormatDirty;
BitField<27, 1, u32> clippingModeDirty;
BitField<28, 1, u32> headphonesConnectedDirty;
@ -337,7 +337,7 @@ namespace Audio::HLE {
/// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for
/// each at the final mixer.
float_le masterVolume;
std::array<float_le, 2> auxReturnVolume;
std::array<float_le, 2> auxVolumes;
u16_le outputBufferCount;
u16 pad1[2];
@ -422,7 +422,7 @@ namespace Audio::HLE {
struct DspStatus {
u16_le unknown;
u16_le dropped_frames;
u16_le droppedFrames;
u16 pad0[0xE];
};
ASSERT_DSP_STRUCT(DspStatus, 32);

View file

@ -2,9 +2,12 @@
#include <array>
#include <cassert>
#include <deque>
#include <memory>
#include <queue>
#include <vector>
#include "audio/aac.hpp"
#include "audio/aac_decoder.hpp"
#include "audio/dsp_core.hpp"
#include "audio/dsp_shared_mem.hpp"
#include "memory.hpp"
@ -41,15 +44,25 @@ namespace Audio {
return this->bufferID > other.bufferID;
}
};
// Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque?
using SampleBuffer = std::deque<std::array<s16, 2>>;
using BufferQueue = std::priority_queue<Buffer>;
BufferQueue buffers;
SampleFormat sampleFormat = SampleFormat::ADPCM;
SourceType sourceType = SourceType::Stereo;
std::array<float, 3> gain0, gain1, gain2;
u32 samplePosition; // Sample number into the current audio buffer
float rateMultiplier;
u16 syncCount;
bool enabled; // Is the source enabled?
u16 currentBufferID;
u16 previousBufferID;
bool enabled; // Is the source enabled?
bool isBufferIDDirty = false; // Did we change buffers?
// ADPCM decoding info:
// An array of fixed point S5.11 coefficients. These provide "weights" for the history samples
@ -65,6 +78,10 @@ namespace Audio {
int index = 0; // Index of the voice in [0, 23] for debugging
void reset();
// Push a buffer to the buffer queue
void pushBuffer(const Buffer& buffer) { buffers.push(buffer); }
// Pop a buffer from the buffer queue and return it
Buffer popBuffer() {
assert(!buffers.empty());
@ -78,8 +95,7 @@ namespace Audio {
DSPSource() { reset(); }
};
class HLE_DSP : public DSPCore {
// The audio frame types are public in case we want to use them for unit tests
class DSPMixer {
public:
template <typename T, usize channelCount = 1>
using Sample = std::array<T, channelCount>;
@ -96,6 +112,43 @@ namespace Audio {
template <typename T>
using QuadFrame = Frame<T, 4>;
private:
using ChannelFormat = HLE::DspConfiguration::OutputFormat;
// The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages
// Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU
static constexpr usize mixerStageCount = 3;
public:
ChannelFormat channelFormat = ChannelFormat::Stereo;
std::array<float, mixerStageCount> volumes;
std::array<bool, 2> enableAuxStages;
void reset() {
channelFormat = ChannelFormat::Stereo;
volumes.fill(0.0);
enableAuxStages.fill(false);
}
};
class HLE_DSP : public DSPCore {
// The audio frame types are public in case we want to use them for unit tests
public:
template <typename T, usize channelCount = 1>
using Sample = DSPMixer::Sample<T, channelCount>;
template <typename T, usize channelCount>
using Frame = DSPMixer::Frame<T, channelCount>;
template <typename T>
using MonoFrame = DSPMixer::MonoFrame<T>;
template <typename T>
using StereoFrame = DSPMixer::StereoFrame<T>;
template <typename T>
using QuadFrame = DSPMixer::QuadFrame<T>;
using Source = Audio::DSPSource;
using SampleBuffer = Source::SampleBuffer;
@ -114,8 +167,8 @@ namespace Audio {
std::array<Source, Audio::HLE::sourceCount> sources; // DSP voices
Audio::HLE::DspMemory dspRam;
SampleFormat sampleFormat = SampleFormat::ADPCM;
SourceType sourceType = SourceType::Stereo;
Audio::DSPMixer mixer;
std::unique_ptr<Audio::AAC::Decoder> aacDecoder;
void resetAudioPipe();
bool loaded = false; // Have we loaded a component?
@ -132,7 +185,7 @@ namespace Audio {
} else if (counter1 == 0xffff && counter0 != 0xfffe) {
return 0;
} else {
return counter0 > counter1 ? 0 : 0;
return (counter0 > counter1) ? 0 : 1;
}
}
@ -157,11 +210,20 @@ namespace Audio {
}
}
void handleAACRequest(const AAC::Message& request);
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients);
void updateMixerConfig(HLE::SharedMemory& sharedMem);
void generateFrame(StereoFrame<s16>& frame);
void generateFrame(DSPSource& source);
void outputFrame();
// Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame
void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion);
// Decode an entire buffer worth of audio
void decodeBuffer(DSPSource& source);
SampleBuffer decodePCM8(const u8* data, usize sampleCount, Source& source);
SampleBuffer decodePCM16(const u8* data, usize sampleCount, Source& source);
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);
public:
@ -169,7 +231,7 @@ namespace Audio {
~HLE_DSP() override {}
void reset() override;
void runAudioFrame() override;
void runAudioFrame(u64 eventTimestamp) override;
u8* getDspMemory() override { return dspRam.rawMemory.data(); }

View file

@ -27,7 +27,7 @@ namespace Audio {
~NullDSP() override {}
void reset() override;
void runAudioFrame() override;
void runAudioFrame(u64 eventTimestamp) override;
u8* getDspMemory() override { return dspRam.data(); }

View file

@ -83,7 +83,7 @@ namespace Audio {
void reset() override;
// Run 1 slice of DSP instructions and schedule the next audio frame
void runAudioFrame() override {
void runAudioFrame(u64 eventTimestamp) override {
runSlice();
scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2);
}

View file

@ -7,16 +7,33 @@
// Remember to initialize every field here to its default value otherwise bad things will happen
struct EmulatorConfig {
// Only enable the shader JIT by default on platforms where it's completely tested
#ifdef PANDA3DS_X64_HOST
#if defined(PANDA3DS_X64_HOST) || defined(PANDA3DS_ARM64_HOST)
static constexpr bool shaderJitDefault = true;
#else
static constexpr bool shaderJitDefault = false;
#endif
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
#else
static constexpr bool ubershaderDefault = true;
#endif
static constexpr bool accelerateShadersDefault = true;
bool shaderJitEnabled = shaderJitDefault;
bool useUbershaders = ubershaderDefault;
bool accelerateShaders = accelerateShadersDefault;
bool accurateShaderMul = false;
bool discordRpcEnabled = false;
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
bool forceShadergenForLights = true;
int lightShadergenThreshold = 1;
RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE;
bool sdCardInserted = true;
bool sdWriteProtected = false;
@ -25,6 +42,9 @@ struct EmulatorConfig {
bool audioEnabled = false;
bool vsyncEnabled = true;
bool enableRenderdoc = false;
bool printAppVersion = true;
bool chargerPlugged = true;
// Default to 3% battery to make users suffer
int batteryPercentage = 3;
@ -33,7 +53,25 @@ struct EmulatorConfig {
std::filesystem::path defaultRomPath = "";
std::filesystem::path filePath;
// Frontend window settings
struct WindowSettings {
static constexpr int defaultX = 200;
static constexpr int defaultY = 200;
static constexpr int defaultWidth = 800;
static constexpr int defaultHeight = 240 * 2;
bool rememberPosition = false; // Remember window position & size
bool showAppVersion = false;
int x = defaultX;
int y = defaultY;
int width = defaultHeight;
int height = defaultHeight;
};
WindowSettings windowSettings;
EmulatorConfig(const std::filesystem::path& path);
void load();
void save();
};
};

View file

@ -1,20 +1,29 @@
#pragma once
#include <array>
#include <cstring>
#include <cstdint>
#include <climits>
#include <cstdint>
#include <cstring>
#include <filesystem>
#include <optional>
#include <vector>
#include "helpers.hpp"
#include "io_file.hpp"
#include "swap.hpp"
namespace Crypto {
constexpr std::size_t AesKeySize = 0x10;
constexpr usize AesKeySize = 0x10;
using AESKey = std::array<u8, AesKeySize>;
template <std::size_t N>
static std::array<u8, N> rolArray(const std::array<u8, N>& value, std::size_t bits) {
struct Seed {
u64_le titleID;
AESKey seed;
std::array<u8, 8> pad;
};
template <usize N>
static std::array<u8, N> rolArray(const std::array<u8, N>& value, usize bits) {
const auto bitWidth = N * CHAR_BIT;
bits %= bitWidth;
@ -24,18 +33,18 @@ namespace Crypto {
std::array<u8, N> result;
for (std::size_t i = 0; i < N; i++) {
for (usize i = 0; i < N; i++) {
result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX;
}
return result;
}
template <std::size_t N>
template <usize N>
static std::array<u8, N> addArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
std::array<u8, N> result;
std::size_t sum = 0;
std::size_t carry = 0;
usize sum = 0;
usize carry = 0;
for (std::int64_t i = N - 1; i >= 0; i--) {
sum = a[i] + b[i] + carry;
@ -46,11 +55,11 @@ namespace Crypto {
return result;
}
template <std::size_t N>
template <usize N>
static std::array<u8, N> xorArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
std::array<u8, N> result;
for (std::size_t i = 0; i < N; i++) {
for (usize i = 0; i < N; i++) {
result[i] = a[i] ^ b[i];
}
@ -63,7 +72,7 @@ namespace Crypto {
}
AESKey rawKey;
for (std::size_t i = 0; i < rawKey.size(); i++) {
for (usize i = 0; i < rawKey.size(); i++) {
rawKey[i] = static_cast<u8>(std::stoi(hex.substr(i * 2, 2), 0, 16));
}
@ -76,7 +85,7 @@ namespace Crypto {
std::optional<AESKey> normalKey = std::nullopt;
};
enum KeySlotId : std::size_t {
enum KeySlotId : usize {
NCCHKey0 = 0x2C,
NCCHKey1 = 0x25,
NCCHKey2 = 0x18,
@ -84,14 +93,17 @@ namespace Crypto {
};
class AESEngine {
private:
constexpr static std::size_t AesKeySlotCount = 0x40;
private:
constexpr static usize AesKeySlotCount = 0x40;
std::optional<AESKey> m_generator = std::nullopt;
std::array<AESKeySlot, AesKeySlotCount> m_slots;
bool keysLoaded = false;
constexpr void updateNormalKey(std::size_t slotId) {
std::vector<Seed> seeds;
IOFile seedDatabase;
constexpr void updateNormalKey(usize slotId) {
if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) {
auto& keySlot = m_slots.at(slotId);
AESKey keyX = keySlot.keyX.value();
@ -101,13 +113,17 @@ namespace Crypto {
}
}
public:
public:
AESEngine() {}
void loadKeys(const std::filesystem::path& path);
void setSeedPath(const std::filesystem::path& path);
// Returns true on success, false on failure
bool loadSeeds();
bool haveKeys() { return keysLoaded; }
bool haveGenerator() { return m_generator.has_value(); }
constexpr bool hasKeyX(std::size_t slotId) {
constexpr bool hasKeyX(usize slotId) {
if (slotId >= AesKeySlotCount) {
return false;
}
@ -115,18 +131,16 @@ namespace Crypto {
return m_slots.at(slotId).keyX.has_value();
}
constexpr AESKey getKeyX(std::size_t slotId) {
return m_slots.at(slotId).keyX.value_or(AESKey{});
}
constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); }
constexpr void setKeyX(std::size_t slotId, const AESKey &key) {
constexpr void setKeyX(usize slotId, const AESKey& key) {
if (slotId < AesKeySlotCount) {
m_slots.at(slotId).keyX = key;
updateNormalKey(slotId);
}
}
constexpr bool hasKeyY(std::size_t slotId) {
constexpr bool hasKeyY(usize slotId) {
if (slotId >= AesKeySlotCount) {
return false;
}
@ -134,18 +148,16 @@ namespace Crypto {
return m_slots.at(slotId).keyY.has_value();
}
constexpr AESKey getKeyY(std::size_t slotId) {
return m_slots.at(slotId).keyY.value_or(AESKey{});
}
constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); }
constexpr void setKeyY(std::size_t slotId, const AESKey &key) {
constexpr void setKeyY(usize slotId, const AESKey& key) {
if (slotId < AesKeySlotCount) {
m_slots.at(slotId).keyY = key;
updateNormalKey(slotId);
}
}
constexpr bool hasNormalKey(std::size_t slotId) {
constexpr bool hasNormalKey(usize slotId) {
if (slotId >= AesKeySlotCount) {
return false;
}
@ -153,14 +165,14 @@ namespace Crypto {
return m_slots.at(slotId).normalKey.has_value();
}
constexpr AESKey getNormalKey(std::size_t slotId) {
return m_slots.at(slotId).normalKey.value_or(AESKey{});
}
constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); }
constexpr void setNormalKey(std::size_t slotId, const AESKey &key) {
constexpr void setNormalKey(usize slotId, const AESKey& key) {
if (slotId < AesKeySlotCount) {
m_slots.at(slotId).normalKey = key;
}
}
std::optional<AESKey> getSeedFromDB(u64 titleID);
};
}
} // namespace Crypto

View file

@ -135,4 +135,7 @@ class Emulator {
std::filesystem::path getAppDataRoot();
std::span<u8> getSMDH();
private:
void loadRenderdoc();
};

View file

@ -1,7 +1,7 @@
#pragma once
#include "helpers.hpp"
using Handle = u32;
using HorizonHandle = u32;
namespace KernelHandles {
enum : u32 {
@ -61,17 +61,17 @@ namespace KernelHandles {
};
// Returns whether "handle" belongs to one of the OS services
static constexpr bool isServiceHandle(Handle handle) {
static constexpr bool isServiceHandle(HorizonHandle handle) {
return handle >= MinServiceHandle && handle <= MaxServiceHandle;
}
// Returns whether "handle" belongs to one of the OS services' shared memory areas
static constexpr bool isSharedMemHandle(Handle handle) {
static constexpr bool isSharedMemHandle(HorizonHandle handle) {
return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle;
}
// Returns the name of a handle as a string based on the given handle
static const char* getServiceName(Handle handle) {
static const char* getServiceName(HorizonHandle handle) {
switch (handle) {
case AC: return "AC";
case ACT: return "ACT";

View file

@ -16,8 +16,11 @@
#include "services/service_manager.hpp"
class CPU;
struct Scheduler;
class Kernel {
using Handle = HorizonHandle;
std::span<u32, 16> regs;
CPU& cpu;
Memory& mem;
@ -247,6 +250,7 @@ public:
ServiceManager& getServiceManager() { return serviceManager; }
KFcram& getFcramManager() { return fcramManager; }
Scheduler& getScheduler();
void sendGPUInterrupt(GPUInterrupt type) { serviceManager.sendGPUInterrupt(type); }
void clearInstructionCache();

View file

@ -53,7 +53,7 @@ enum class FcramRegion {
struct AddressArbiter {};
struct ResourceLimits {
Handle handle;
HorizonHandle handle;
s32 currentCommit = 0;
};
@ -97,6 +97,8 @@ struct Port {
};
struct Session {
using Handle = HorizonHandle;
Handle portHandle; // The port this session is subscribed to
Session(Handle portHandle) : portHandle(portHandle) {}
};
@ -115,6 +117,8 @@ enum class ThreadStatus {
};
struct Thread {
using Handle = HorizonHandle;
u32 initialSP; // Initial r13 value
u32 entrypoint; // Initial r15 value
u32 priority;
@ -167,6 +171,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) {
}
struct Mutex {
using Handle = HorizonHandle;
u64 waitlist; // Refer to the getWaitlist function below for documentation
Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked
Handle handle; // Handle of the mutex itself
@ -209,6 +215,8 @@ struct MemoryBlock {
// Generic kernel object class
struct KernelObject {
using Handle = HorizonHandle;
Handle handle = 0; // A u32 the OS will use to identify objects
void* data = nullptr;
KernelObjectType type;

View file

@ -50,6 +50,7 @@ struct NCCH {
static constexpr u64 mediaUnit = 0x200;
u64 size = 0; // Size of NCCH converted to bytes
u64 saveDataSize = 0;
u32 stackSize = 0;
u32 bssSize = 0;
u32 exheaderSize = 0;
@ -60,10 +61,10 @@ struct NCCH {
CodeSetInfo text, data, rodata;
FSInfo partitionInfo;
std::optional<Crypto::AESKey> primaryKey, secondaryKey;
// Contents of the .code file in the ExeFS
std::vector<u8> codeFile;
// Contains of the cart's save data
std::vector<u8> saveData;
// The cart region. Only the CXI's region matters to us. Necessary to get past region locking
std::optional<Regions> region = std::nullopt;
std::vector<u8> smdh;
@ -76,7 +77,7 @@ struct NCCH {
bool hasExeFS() { return exeFS.size != 0; }
bool hasRomFS() { return romFS.size != 0; }
bool hasCode() { return codeFile.size() != 0; }
bool hasSaveData() { return saveData.size() != 0; }
bool hasSaveData() { return saveDataSize != 0; }
// Parse SMDH for region info and such. Returns false on failure, true on success
bool parseSMDH(const std::vector<u8> &smdh);

View file

@ -114,6 +114,7 @@ class Memory {
bool changeState = false;
bool changePerms = false;
};
using Handle = HorizonHandle;
u8* fcram;
u8* dspRam; // Provided to us by Audio
@ -222,8 +223,14 @@ private:
u8* getFCRAM() { return fcram; }
enum class BatteryLevel {
Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars
Empty = 0,
AlmostEmpty,
OneBar,
TwoBars,
ThreeBars,
FourBars,
};
u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) {
u8 value = static_cast<u8>(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5
if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected
@ -287,5 +294,5 @@ private:
bool allocateMainThreadStack(u32 size);
Regions getConsoleRegion();
void copySharedFont(u8* ptr);
void copySharedFont(u8* ptr, u32 vaddr);
};

View file

@ -1,6 +1,13 @@
#pragma once
#include <QAction>
#include <QCheckBox>
#include <QDialog>
#include <QLabel>
#include <QLineEdit>
#include <QListWidget>
#include <QPushButton>
#include <QTextEdit>
#include <QWidget>
#include <filesystem>
#include <memory>
@ -24,3 +31,60 @@ class CheatsWindow final : public QWidget {
std::filesystem::path cheatPath;
Emulator* emu;
};
struct CheatMetadata {
u32 handle = Cheats::badCheatHandle;
std::string name = "New cheat";
std::string code;
bool enabled = true;
};
class CheatEntryWidget : public QWidget {
Q_OBJECT
public:
CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent);
void Update() {
name->setText(metadata.name.c_str());
enabled->setChecked(metadata.enabled);
update();
}
void Remove() {
emu->getCheats().removeCheat(metadata.handle);
cheatList->takeItem(cheatList->row(listItem));
deleteLater();
}
const CheatMetadata& getMetadata() { return metadata; }
void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; }
private:
void checkboxChanged(int state);
void editClicked();
Emulator* emu;
CheatMetadata metadata;
u32 handle;
QLabel* name;
QCheckBox* enabled;
QListWidget* cheatList;
QListWidgetItem* listItem;
};
class CheatEditDialog : public QDialog {
Q_OBJECT
public:
CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry);
void accepted();
void rejected();
private:
Emulator* emu;
CheatEntryWidget& cheatEntry;
QTextEdit* codeEdit;
QLineEdit* nameEdit;
};

View file

@ -0,0 +1,21 @@
#pragma once
#include <QFontMetrics>
#include <QLabel>
#include <QString>
#include <QWidget>
class ElidedLabel : public QLabel {
Q_OBJECT
public:
explicit ElidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr);
explicit ElidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr);
void setText(QString text);
protected:
void resizeEvent(QResizeEvent* event);
private:
void updateText();
QString m_text;
Qt::TextElideMode m_elideMode;
};

View file

@ -17,7 +17,9 @@
#include "panda_qt/about_window.hpp"
#include "panda_qt/cheats_window.hpp"
#include "panda_qt/config_window.hpp"
#include "panda_qt/patch_window.hpp"
#include "panda_qt/screen.hpp"
#include "panda_qt/shader_editor.hpp"
#include "panda_qt/text_editor.hpp"
#include "services/hid.hpp"
@ -47,6 +49,8 @@ class MainWindow : public QMainWindow {
EditCheat,
PressTouchscreen,
ReleaseTouchscreen,
ReloadUbershader,
SetScreenSize,
};
// Tagged union representing our message queue messages
@ -78,6 +82,11 @@ class MainWindow : public QMainWindow {
u16 x;
u16 y;
} touchscreen;
struct {
u32 width;
u32 height;
} screenSize;
};
};
@ -90,13 +99,15 @@ class MainWindow : public QMainWindow {
std::mutex messageQueueMutex;
std::vector<EmulatorMessage> messageQueue;
QMenuBar* menuBar = nullptr;
InputMappings keyboardMappings;
ScreenWidget screen;
ScreenWidget* screen;
AboutWindow* aboutWindow;
ConfigWindow* configWindow;
CheatsWindow* cheatsEditor;
TextEditorWindow* luaEditor;
QMenuBar* menuBar = nullptr;
PatchWindow* patchWindow;
ShaderEditorWindow* shaderEditor;
// We use SDL's game controller API since it's the sanest API that supports as many controllers as possible
SDL_GameController* gameController = nullptr;
@ -108,17 +119,17 @@ class MainWindow : public QMainWindow {
void selectROM();
void dumpDspFirmware();
void dumpRomFS();
void openLuaEditor();
void openCheatsEditor();
void showAboutMenu();
void initControllers();
void pollControllers();
void setupControllerSensors(SDL_GameController* controller);
void sendMessage(const EmulatorMessage& message);
void dispatchMessage(const EmulatorMessage& message);
// Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer
bool usingGL = false;
bool usingVk = false;
bool usingMtl = false;
// Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard
// This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state
@ -130,11 +141,15 @@ class MainWindow : public QMainWindow {
MainWindow(QApplication* app, QWidget* parent = nullptr);
~MainWindow();
void closeEvent(QCloseEvent* event) override;
void keyPressEvent(QKeyEvent* event) override;
void keyReleaseEvent(QKeyEvent* event) override;
void mousePressEvent(QMouseEvent* event) override;
void mouseReleaseEvent(QMouseEvent* event) override;
void loadLuaScript(const std::string& code);
void reloadShader(const std::string& shader);
void editCheat(u32 handle, const std::vector<uint8_t>& cheat, const std::function<void(u32)>& callback);
void handleScreenResize(u32 width, u32 height);
};

View file

@ -0,0 +1,31 @@
#pragma once
#include <QLabel>
#include <QMessageBox>
#include <QWidget>
#include <filesystem>
#include "panda_qt/elided_label.hpp"
class PatchWindow final : public QWidget {
Q_OBJECT
public:
PatchWindow(QWidget* parent = nullptr);
~PatchWindow() = default;
private:
// Show a message box
// Title: Title of the message box to display
// Message: Message to display
// Icon: The type of icon (error, warning, information, etc) to display
// IconPath: If non-null, then a path to an icon in our assets to display on the OK button
void displayMessage(
const QString& title, const QString& message, QMessageBox::Icon icon = QMessageBox::Icon::Warning, const char* iconPath = nullptr
);
std::filesystem::path inputPath = "";
std::filesystem::path patchPath = "";
ElidedLabel* inputPathLabel = nullptr;
ElidedLabel* patchPathLabel = nullptr;
};

View file

@ -1,5 +1,6 @@
#pragma once
#include <QWidget>
#include <functional>
#include <memory>
#include "gl/context.h"
@ -10,15 +11,28 @@ class ScreenWidget : public QWidget {
Q_OBJECT
public:
ScreenWidget(QWidget* parent = nullptr);
using ResizeCallback = std::function<void(u32, u32)>;
ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr);
void resizeEvent(QResizeEvent* event) override;
// Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context
void resizeSurface(u32 width, u32 height);
GL::Context* getGLContext() { return glContext.get(); }
// Dimensions of our output surface
u32 surfaceWidth = 0;
u32 surfaceHeight = 0;
WindowInfo windowInfo;
// Cached "previous" dimensions, used when resizing our window
u32 previousWidth = 0;
u32 previousHeight = 0;
private:
std::unique_ptr<GL::Context> glContext = nullptr;
ResizeCallback resizeCallback;
bool createGLContext();
qreal devicePixelRatioFromScreen() const;

View file

@ -0,0 +1,27 @@
#pragma once
#include <QApplication>
#include <QDialog>
#include <QWidget>
#include <string>
#include "zep.h"
#include "zep/mode_repl.h"
#include "zep/regress.h"
class ShaderEditorWindow : public QDialog {
Q_OBJECT
private:
Zep::ZepWidget_Qt zepWidget;
Zep::IZepReplProvider replProvider;
static constexpr float fontSize = 14.0f;
public:
// Whether this backend supports shader editor
bool supported = true;
ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText);
void setText(const std::string& text) { zepWidget.GetEditor().GetMRUBuffer()->SetText(text); }
void setEnable(bool enable);
};

View file

@ -23,6 +23,8 @@ class FrontendSDL {
SDL_GameController* gameController = nullptr;
InputMappings keyboardMappings;
u32 windowWidth = 400;
u32 windowHeight = 480;
int gameControllerID;
bool programRunning = true;
@ -35,4 +37,6 @@ class FrontendSDL {
// And so the user can still use the keyboard to control the analog
bool keyboardAnalogX = false;
bool keyboardAnalogY = false;
void setupControllerSensors(SDL_GameController* controller);
};

69
include/renderdoc.hpp Normal file
View file

@ -0,0 +1,69 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <string>
#include "helpers.hpp"
#ifdef PANDA3DS_ENABLE_RENDERDOC
namespace Renderdoc {
// Loads renderdoc dynamic library module.
void loadRenderdoc();
// Begins a capture if a renderdoc instance is attached.
void startCapture();
// Ends current renderdoc capture.
void endCapture();
// Triggers capturing process.
void triggerCapture();
// Sets output directory for captures
void setOutputDir(const std::string& path, const std::string& prefix);
// Returns whether we've compiled with Renderdoc support
static constexpr bool isSupported() { return true; }
} // namespace Renderdoc
#else
namespace Renderdoc {
static void loadRenderdoc() {}
static void startCapture() { Helpers::panic("Tried to start a Renderdoc capture while support for renderdoc is disabled") }
static void endCapture() { Helpers::panic("Tried to end a Renderdoc capture while support for renderdoc is disabled") }
static void triggerCapture() { Helpers::panic("Tried to trigger a Renderdoc capture while support for renderdoc is disabled") }
static void setOutputDir(const std::string& path, const std::string& prefix) {}
static constexpr bool isSupported() { return false; }
} // namespace Renderdoc
#endif
namespace Renderdoc {
// RAII scope class that encloses a Renderdoc capture, as long as it's triggered by triggerCapture
struct Scope {
Scope() { Renderdoc::startCapture(); }
~Scope() { Renderdoc::endCapture(); }
Scope(const Scope&) = delete;
Scope& operator=(const Scope&) = delete;
Scope(Scope&&) = delete;
Scope& operator=(const Scope&&) = delete;
};
// RAII scope class that encloses a Renderdoc capture. Unlike regular Scope it doesn't wait for a trigger, it will always issue the capture
// trigger on its own and take a capture
struct InstantScope {
InstantScope() {
Renderdoc::triggerCapture();
Renderdoc::startCapture();
}
~InstantScope() { Renderdoc::endCapture(); }
InstantScope(const InstantScope&) = delete;
InstantScope& operator=(const InstantScope&) = delete;
InstantScope(InstantScope&&) = delete;
InstantScope& operator=(const InstantScope&&) = delete;
};
} // namespace Renderdoc

View file

@ -1,8 +1,10 @@
#pragma once
#include <array>
#include <span>
#include <optional>
#include <span>
#include <string>
#include "PICA/draw_acceleration.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "helpers.hpp"
@ -16,12 +18,16 @@ enum class RendererType : s8 {
Null = 0,
OpenGL = 1,
Vulkan = 2,
Software = 3,
Metal = 3,
Software = 4,
};
class GPU;
struct EmulatorConfig;
struct SDL_Window;
class GPU;
class ShaderUnit;
class Renderer {
protected:
GPU& gpu;
@ -45,6 +51,8 @@ class Renderer {
u32 outputWindowWidth = 400;
u32 outputWindowHeight = 240 * 2;
EmulatorConfig* emulatorConfig = nullptr;
public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
virtual ~Renderer();
@ -66,6 +74,19 @@ class Renderer {
// This function does things like write back or cache necessary state before we delete our context
virtual void deinitGraphicsContext() = 0;
// Functions for hooking up the renderer core to the frontend's shader editor for editing ubershaders in real time
// SupportsShaderReload: Indicates whether the backend offers ubershader reload support or not
// GetUbershader/SetUbershader: Gets or sets the renderer's current ubershader
virtual bool supportsShaderReload() { return false; }
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}
// This function is called on every draw call before parsing vertex data.
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
// ubershaders and shadergen, and so on.
// Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; }
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }
@ -91,4 +112,6 @@ class Renderer {
outputWindowWidth = width;
outputWindowHeight = height;
}
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
};

View file

@ -0,0 +1,12 @@
#pragma once
// Information about our OpenGL/OpenGL ES driver that we should keep track of
// Stuff like whether specific extensions are supported, and potentially things like OpenGL context information
namespace OpenGL {
struct Driver {
bool supportsExtFbFetch = false;
bool supportsArmFbFetch = false;
bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; }
};
} // namespace OpenGL

View file

@ -38,11 +38,14 @@ struct GLStateManager {
GLuint stencilMask;
GLuint boundVAO;
GLuint boundVBO;
GLuint currentProgram;
GLuint boundUBO;
GLenum depthFunc;
GLenum logicOp;
GLenum blendEquationRGB, blendEquationAlpha;
GLenum blendFuncSourceRGB, blendFuncSourceAlpha;
GLenum blendFuncDestRGB, blendFuncDestAlpha;
void reset();
void resetBlend();
@ -51,7 +54,7 @@ struct GLStateManager {
void resetColourMask();
void resetDepth();
void resetVAO();
void resetVBO();
void resetBuffers();
void resetProgram();
void resetScissor();
void resetStencil();
@ -169,13 +172,6 @@ struct GLStateManager {
}
}
void bindVBO(GLuint handle) {
if (boundVBO != handle) {
boundVBO = handle;
glBindBuffer(GL_ARRAY_BUFFER, handle);
}
}
void useProgram(GLuint handle) {
if (currentProgram != handle) {
currentProgram = handle;
@ -183,8 +179,14 @@ struct GLStateManager {
}
}
void bindUBO(GLuint handle) {
if (boundUBO != handle) {
boundUBO = handle;
glBindBuffer(GL_UNIFORM_BUFFER, boundUBO);
}
}
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
void setColourMask(bool r, bool g, bool b, bool a) {
@ -224,6 +226,41 @@ struct GLStateManager {
}
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
// Counterpart to glBlendEquationSeparate
void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) {
if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) {
blendEquationRGB = modeRGB;
blendEquationAlpha = modeAlpha;
glBlendEquationSeparate(modeRGB, modeAlpha);
}
}
// Counterpart to glBlendFuncSeparate
void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) {
if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha ||
blendFuncDestAlpha != destAlpha) {
blendFuncSourceRGB = sourceRGB;
blendFuncDestRGB = destRGB;
blendFuncSourceAlpha = sourceAlpha;
blendFuncDestAlpha = destAlpha;
glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha);
}
}
// Counterpart to regular glBlendEquation
void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); }
void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) {
setBlendEquation(static_cast<GLenum>(modeRGB), static_cast<GLenum>(modeAlpha));
}
void setBlendEquation(OpenGL::BlendEquation mode) {
setBlendEquation(static_cast<GLenum>(mode));
}
};
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");

View file

@ -1,11 +1,23 @@
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <memory>
#include <optional>
#include <span>
#include <unordered_map>
#include <utility>
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/pica_hash.hpp"
#include "PICA/pica_vert_config.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
#include "gl/stream_buffer.h"
#include "gl_driver.hpp"
#include "gl_state.hpp"
#include "helpers.hpp"
#include "logger.hpp"
@ -22,27 +34,48 @@ class RendererGL final : public Renderer {
OpenGL::Program triangleProgram;
OpenGL::Program displayProgram;
OpenGL::VertexArray vao;
// VAO for when not using accelerated vertex shaders. Contains attribute declarations matching to the PICA fixed function fragment attributes
OpenGL::VertexArray defaultVAO;
// VAO for when using accelerated vertex shaders. The PICA vertex shader inputs are passed as attributes without CPU processing.
OpenGL::VertexArray hwShaderVAO;
OpenGL::VertexBuffer vbo;
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
} ubershaderData;
float oldDepthScale = -1.0;
float oldDepthOffset = 0.0;
bool oldDepthmapEnable = false;
// Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader
bool usingAcceleratedShader = false;
bool performIndexedRender = false;
bool usingShortIndices = false;
// Set by prepareForDraw, metadata for indexed renders
GLuint minimumIndex = 0;
GLuint maximumIndex = 0;
void* hwIndexBufferOffset = nullptr;
// When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw
u32 previousAttributeMask = 0;
// Cached pointer to the current vertex shader when using HW accelerated shaders
OpenGL::Shader* generatedVertexShader = nullptr;
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
@ -53,25 +86,81 @@ class RendererGL final : public Renderer {
OpenGL::VertexBuffer dummyVBO;
OpenGL::Texture screenTexture;
GLuint lightLUTTextureArray;
OpenGL::Texture LUTTexture;
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture;
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
// We can compile this once and then link it with all other generated fragment shaders
OpenGL::Shader defaultShadergenVs;
GLuint shadergenFragmentUBO;
// UBO for uploading the PICA uniforms when using hw shaders
GLuint hwShaderUniformUBO;
using StreamBuffer = OpenGLStreamBuffer;
std::unique_ptr<StreamBuffer> hwVertexBuffer;
std::unique_ptr<StreamBuffer> hwIndexBuffer;
// Cache of fixed attribute values so that we don't do any duplicate updates
std::array<std::array<float, 4>, 16> fixedAttrValues;
// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
};
struct ShaderCache {
std::unordered_map<PICA::VertConfig, std::optional<OpenGL::Shader>> vertexShaderCache;
std::unordered_map<PICA::FragmentConfig, OpenGL::Shader> fragmentShaderCache;
// Program cache indexed by GLuints for the vertex and fragment shader to use
// Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint
std::unordered_map<u64, CachedProgram> programCache;
void clear() {
for (auto& it : programCache) {
CachedProgram& cachedProgram = it.second;
cachedProgram.program.free();
}
for (auto& it : vertexShaderCache) {
if (it.second.has_value()) {
it.second->free();
}
}
for (auto& it : fragmentShaderCache) {
it.second.free();
}
programCache.clear();
vertexShaderCache.clear();
fragmentShaderCache.clear();
}
};
ShaderCache shaderCache;
OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program& getSpecializedShader();
PICA::ShaderGen::FragmentGenerator fragShaderGen;
OpenGL::Driver driverInfo;
MAKE_LOG_FUNCTION(log, rendererLogger)
void setupBlending();
void setupStencilTest(bool stencilEnable);
void bindDepthBuffer();
void setupTextureEnvState();
void setupUbershaderTexEnv();
void bindTexturesToSlots();
void updateLightingLUT();
void updateFogLUT();
void initGraphicsContextInternal();
void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel);
public:
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
: Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {}
~RendererGL() override;
void reset() override;
@ -82,12 +171,18 @@ class RendererGL final : public Renderer {
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void deinitGraphicsContext() override;
virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override;
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
// Note: The caller is responsible for deleting the currently bound FBO before calling this
void setFBO(uint handle) { screenFramebuffer.m_handle = handle; }
void resetStateManager() { gl.reset(); }
void initUbershader(OpenGL::Program& program);
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
@ -95,4 +190,4 @@ class RendererGL final : public Renderer {
// Take a screenshot of the screen and store it in a file
void screenshot(const std::string& name) override;
};
};

View file

@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
class SurfaceCache {
// Vanilla std::optional can't hold actual references
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
std::is_same<SurfaceType, Texture>(), "Invalid surface type");
size_t size;
size_t evictionIndex;

View file

@ -0,0 +1,74 @@
#pragma once
#include <map>
#include "objc_helper.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BlitPipelineHash {
// Formats
ColorFmt colorFmt;
DepthFmt depthFmt;
};
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class BlitPipelineCache {
public:
BlitPipelineCache() = default;
~BlitPipelineCache() {
reset();
vertexFunction->release();
fragmentFunction->release();
}
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
device = dev;
vertexFunction = vert;
fragmentFunction = frag;
}
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
auto& pipeline = pipelineCache[intHash];
if (!pipeline) {
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
NS::Error* error = nullptr;
desc->setLabel(toNSString("Blit pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void reset() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
}
private:
std::map<u8, MTL::RenderPipelineState*> pipelineCache;
MTL::Device* device;
MTL::Function* vertexFunction;
MTL::Function* fragmentFunction;
};
} // namespace Metal

View file

@ -0,0 +1,56 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
struct RenderState {
MTL::RenderPipelineState* renderPipelineState = nullptr;
MTL::DepthStencilState* depthStencilState = nullptr;
MTL::Texture* textures[3] = {nullptr};
MTL::SamplerState* samplerStates[3] = {nullptr};
};
class CommandEncoder {
public:
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
renderCommandEncoder = rce;
// Reset the render state
renderState = RenderState{};
}
// Resource binding
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
if (renderPipelineState != renderState.renderPipelineState) {
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
renderState.renderPipelineState = renderPipelineState;
}
}
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
if (depthStencilState != renderState.depthStencilState) {
renderCommandEncoder->setDepthStencilState(depthStencilState);
renderState.depthStencilState = depthStencilState;
}
}
void setFragmentTexture(MTL::Texture* texture, u32 index) {
if (texture != renderState.textures[index]) {
renderCommandEncoder->setFragmentTexture(texture, index);
renderState.textures[index] = texture;
}
}
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
if (samplerState != renderState.samplerStates[index]) {
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
renderState.samplerStates[index] = samplerState;
}
}
private:
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
RenderState renderState;
};
} // namespace Metal

View file

@ -0,0 +1,6 @@
#pragma once
#include <Metal/Metal.hpp>
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)

View file

@ -0,0 +1,80 @@
#pragma once
#include <map>
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct DepthStencilHash {
u32 stencilConfig;
u16 stencilOpConfig;
bool depthStencilWrite;
u8 depthFunc;
};
class DepthStencilCache {
public:
DepthStencilCache() = default;
~DepthStencilCache() { reset(); }
void set(MTL::Device* dev) { device = dev; }
MTL::DepthStencilState* get(DepthStencilHash hash) {
u64 intHash =
((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig;
auto& depthStencilState = depthStencilCache[intHash];
if (!depthStencilState) {
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
desc->setDepthWriteEnabled(hash.depthStencilWrite);
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig);
MTL::StencilDescriptor* stencilDesc = nullptr;
if (stencilEnable) {
const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0;
const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig);
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig);
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig);
stencilDesc = MTL::StencilDescriptor::alloc()->init();
stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp));
stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp));
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
stencilDesc->setReadMask(stencilRefMask);
stencilDesc->setWriteMask(stencilBufferMask);
desc->setFrontFaceStencil(stencilDesc);
desc->setBackFaceStencil(stencilDesc);
}
depthStencilState = device->newDepthStencilState(desc);
desc->release();
if (stencilDesc) {
stencilDesc->release();
}
}
return depthStencilState;
}
void reset() {
for (auto& pair : depthStencilCache) {
pair.second->release();
}
depthStencilCache.clear();
}
private:
std::map<u64, MTL::DepthStencilState*> depthStencilCache;
MTL::Device* device;
};
} // namespace Metal

View file

@ -0,0 +1,162 @@
#pragma once
#include <map>
#include "objc_helper.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct DrawFragmentFunctionHash {
u32 lightingConfig1; // 32 bits (TODO: check this)
bool lightingEnabled; // 1 bit
u8 lightingNumLights; // 3 bits
// | ref | func | on |
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
};
inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) {
if (!l.lightingEnabled && r.lightingEnabled) return true;
if (l.lightingNumLights < r.lightingNumLights) return true;
if (l.lightingConfig1 < r.lightingConfig1) return true;
if (l.alphaControl < r.alphaControl) return true;
return false;
}
struct DrawPipelineHash { // 56 bits
// Formats
ColorFmt colorFmt; // 3 bits
DepthFmt depthFmt; // 3 bits
// Blending
bool blendEnabled; // 1 bit
// | functions | aeq | ceq |
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111)
u8 colorWriteMask; // 4 bits
DrawFragmentFunctionHash fragHash;
};
inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) {
if ((u32)l.colorFmt < (u32)r.colorFmt) return true;
if ((u32)l.depthFmt < (u32)r.depthFmt) return true;
if (!l.blendEnabled && r.blendEnabled) return true;
if (l.blendControl < r.blendControl) return true;
if (l.colorWriteMask < r.colorWriteMask) return true;
if (l.fragHash < r.fragHash) return true;
return false;
}
// This pipeline only caches the pipeline with all of its color and depth attachment variations
class DrawPipelineCache {
public:
DrawPipelineCache() = default;
~DrawPipelineCache() {
reset();
vertexDescriptor->release();
vertexFunction->release();
}
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
device = dev;
library = lib;
vertexFunction = vert;
vertexDescriptor = vertDesc;
}
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
auto& pipeline = pipelineCache[hash];
if (!pipeline) {
auto& fragmentFunction = fragmentFunctionCache[hash.fragHash];
if (!fragmentFunction) {
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2));
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
NS::Error* error = nullptr;
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
if (error) {
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
constants->release();
}
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
desc->setVertexFunction(vertexFunction);
desc->setFragmentFunction(fragmentFunction);
desc->setVertexDescriptor(vertexDescriptor);
auto colorAttachment = desc->colorAttachments()->object(0);
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
MTL::ColorWriteMask writeMask = 0;
if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
colorAttachment->setWriteMask(writeMask);
if (hash.blendEnabled) {
const u8 rgbEquation = hash.blendControl & 0x7;
const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
// Get blending functions
const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl);
const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl);
const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl);
const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl);
colorAttachment->setBlendingEnabled(true);
colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation));
colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
}
MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
desc->setDepthAttachmentPixelFormat(depthFormat);
if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat);
NS::Error* error = nullptr;
desc->setLabel(toNSString("Draw pipeline"));
pipeline = device->newRenderPipelineState(desc, &error);
if (error) {
Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
}
desc->release();
}
return pipeline;
}
void reset() {
for (auto& pair : pipelineCache) {
pair.second->release();
}
pipelineCache.clear();
for (auto& pair : fragmentFunctionCache) {
pair.second->release();
}
fragmentFunctionCache.clear();
}
private:
std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
MTL::Device* device;
MTL::Library* library;
MTL::Function* vertexFunction;
MTL::VertexDescriptor* vertexDescriptor;
};
} // namespace Metal

View file

@ -0,0 +1,20 @@
#pragma once
#include <Metal/Metal.hpp>
namespace Metal {
class LutTexture {
public:
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
~LutTexture();
u32 getNextIndex();
MTL::Texture* getTexture() { return texture; }
u32 getCurrentIndex() { return currentIndex; }
private:
MTL::Texture* texture;
u32 currentIndex = 0;
};
} // namespace Metal

View file

@ -0,0 +1,91 @@
#pragma once
#include <Metal/Metal.hpp>
#include <array>
#include <string>
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "math_util.hpp"
#include "objc_helper.hpp"
#include "opengl.hpp"
#include "pica_to_mtl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
namespace Metal {
template <typename Format_t>
struct RenderTarget {
MTL::Device* device;
u32 location;
Format_t format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
MTL::Texture* texture = nullptr;
RenderTarget() : valid(false) {}
RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
const u32 x0 = (startOffset % (size.x() * 8)) / 8;
const u32 y0 = (startOffset / (size.x() * 8)) * 8;
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(RenderTarget& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate() {
MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid;
if (std::is_same<Format_t, PICA::ColorFmt>::value) {
pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format);
} else if (std::is_same<Format_t, PICA::DepthFmt>::value) {
pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format);
} else {
panic("Invalid format type");
}
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
descriptor->setTextureType(MTL::TextureType2D);
descriptor->setPixelFormat(pixelFormat);
descriptor->setWidth(size.u());
descriptor->setHeight(size.v());
descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
descriptor->setStorageMode(MTL::StorageModePrivate);
texture = device->newTexture(descriptor);
texture->setLabel(toNSString(
std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" +
std::to_string(size.v())
));
descriptor->release();
}
void free() {
valid = false;
if (texture) {
texture->release();
}
}
u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); }
};
using ColorRenderTarget = RenderTarget<PICA::ColorFmt>;
using DepthStencilRenderTarget = RenderTarget<PICA::DepthFmt>;
} // namespace Metal

View file

@ -0,0 +1,73 @@
#pragma once
#include <Metal/Metal.hpp>
#include <array>
#include <string>
#include "PICA/regs.hpp"
#include "boost/icl/interval.hpp"
#include "helpers.hpp"
#include "math_util.hpp"
#include "opengl.hpp"
#include "renderer_mtl/pica_to_mtl.hpp"
template <typename T>
using Interval = boost::icl::right_open_interval<T>;
namespace Metal {
struct Texture {
MTL::Device* device;
u32 location;
u32 config; // Magnification/minification filter, wrapping configs, etc
PICA::TextureFmt format;
OpenGL::uvec2 size;
bool valid;
// Range of VRAM taken up by buffer
Interval<u32> range;
PICA::PixelFormatInfo formatInfo;
MTL::Texture* texture = nullptr;
MTL::SamplerState* sampler = nullptr;
Texture() : valid(false) {}
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
u64 endLoc = (u64)loc + sizeInBytes();
// Check if start and end are valid here
range = Interval<u32>(loc, (u32)endLoc);
}
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
bool matches(Texture& other) {
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
}
void allocate();
void setNewConfig(u32 newConfig);
void decodeTexture(std::span<const u8> data);
void free();
u64 sizeInBytes();
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
// Get the morton interleave offset of a texel based on its U and V values
static u32 mortonInterleave(u32 u, u32 v);
// Get the byte offset of texel (u, v) in the texture
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
// Returns the format of this texture as a string
std::string_view formatToString() { return PICA::textureFormatToString(format); }
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
// TODO: Make hasAlpha a template parameter
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
};
} // namespace Metal

View file

@ -0,0 +1,83 @@
#pragma once
#include <cstring>
#include "helpers.hpp"
#include "pica_to_mtl.hpp"
using namespace PICA;
namespace Metal {
struct BufferHandle {
MTL::Buffer* buffer;
usize offset;
};
class VertexBufferCache {
// 128MB buffer for caching vertex data
static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024;
public:
VertexBufferCache() = default;
~VertexBufferCache() {
endFrame();
buffer->release();
}
void set(MTL::Device* dev) {
device = dev;
create();
}
void endFrame() {
ptr = 0;
for (auto buffer : additionalAllocations) {
buffer->release();
}
additionalAllocations.clear();
}
BufferHandle get(const void* data, usize size) {
// If the vertex buffer is too large, just create a new one
if (ptr + size > CACHE_BUFFER_SIZE) {
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
newBuffer->setLabel(toNSString("Additional vertex buffer"));
additionalAllocations.push_back(newBuffer);
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
return BufferHandle{newBuffer, 0};
}
// Copy the data into the buffer
std::memcpy((char*)buffer->contents() + ptr, data, size);
auto oldPtr = ptr;
ptr += size;
return BufferHandle{buffer, oldPtr};
}
void reset() {
endFrame();
if (buffer) {
buffer->release();
create();
}
}
private:
MTL::Buffer* buffer = nullptr;
usize ptr = 0;
std::vector<MTL::Buffer*> additionalAllocations;
MTL::Device* device;
void create() {
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
buffer->setLabel(toNSString("Shared vertex buffer"));
}
};
} // namespace Metal

View file

@ -0,0 +1,12 @@
#pragma once
#include <string>
#include "mtl_common.hpp"
namespace Metal {
dispatch_data_t createDispatchData(const void* data, size_t size);
} // namespace Metal
// Cast from std::string to NS::String*
inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); }

View file

@ -0,0 +1,152 @@
#pragma once
#include <Metal/Metal.hpp>
#include "PICA/regs.hpp"
namespace PICA {
struct PixelFormatInfo {
MTL::PixelFormat pixelFormat;
size_t bytesPerTexel;
};
constexpr PixelFormatInfo pixelFormatInfos[14] = {
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
{MTL::PixelFormatRG8Unorm, 2}, // RG8
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
{MTL::PixelFormatA8Unorm, 1}, // A8
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
{MTL::PixelFormatABGR4Unorm, 2}, // I4
{MTL::PixelFormatA8Unorm, 1}, // A4
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
};
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
switch (format) {
case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
}
}
inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) {
switch (format) {
case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm;
case DepthFmt::Unknown1: return MTL::PixelFormatInvalid;
case DepthFmt::Depth24:
return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats
// Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead
case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8;
}
}
inline MTL::CompareFunction toMTLCompareFunc(u8 func) {
switch (func) {
case 0: return MTL::CompareFunctionNever;
case 1: return MTL::CompareFunctionAlways;
case 2: return MTL::CompareFunctionEqual;
case 3: return MTL::CompareFunctionNotEqual;
case 4: return MTL::CompareFunctionLess;
case 5: return MTL::CompareFunctionLessEqual;
case 6: return MTL::CompareFunctionGreater;
case 7: return MTL::CompareFunctionGreaterEqual;
default: Helpers::panic("Unknown compare function %u", func);
}
return MTL::CompareFunctionAlways;
}
inline MTL::BlendOperation toMTLBlendOperation(u8 op) {
switch (op) {
case 0: return MTL::BlendOperationAdd;
case 1: return MTL::BlendOperationSubtract;
case 2: return MTL::BlendOperationReverseSubtract;
case 3: return MTL::BlendOperationMin;
case 4: return MTL::BlendOperationMax;
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
default: Helpers::panic("Unknown blend operation %u", op);
}
return MTL::BlendOperationAdd;
}
inline MTL::BlendFactor toMTLBlendFactor(u8 factor) {
switch (factor) {
case 0: return MTL::BlendFactorZero;
case 1: return MTL::BlendFactorOne;
case 2: return MTL::BlendFactorSourceColor;
case 3: return MTL::BlendFactorOneMinusSourceColor;
case 4: return MTL::BlendFactorDestinationColor;
case 5: return MTL::BlendFactorOneMinusDestinationColor;
case 6: return MTL::BlendFactorSourceAlpha;
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
case 8: return MTL::BlendFactorDestinationAlpha;
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
case 10: return MTL::BlendFactorBlendColor;
case 11: return MTL::BlendFactorOneMinusBlendColor;
case 12: return MTL::BlendFactorBlendAlpha;
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
case 14: return MTL::BlendFactorSourceAlphaSaturated;
case 15: return MTL::BlendFactorOne; // Undocumented
default: Helpers::panic("Unknown blend factor %u", factor);
}
return MTL::BlendFactorOne;
}
inline MTL::StencilOperation toMTLStencilOperation(u8 op) {
switch (op) {
case 0: return MTL::StencilOperationKeep;
case 1: return MTL::StencilOperationZero;
case 2: return MTL::StencilOperationReplace;
case 3: return MTL::StencilOperationIncrementClamp;
case 4: return MTL::StencilOperationDecrementClamp;
case 5: return MTL::StencilOperationInvert;
case 6: return MTL::StencilOperationIncrementWrap;
case 7: return MTL::StencilOperationDecrementWrap;
default: Helpers::panic("Unknown stencil operation %u", op);
}
return MTL::StencilOperationKeep;
}
inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) {
switch (primType) {
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle;
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip;
case PrimType::TriangleFan:
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
return MTL::PrimitiveTypeTriangle;
case PrimType::GeometryPrimitive:
return MTL::PrimitiveTypeTriangle;
}
}
inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) {
switch (addrMode) {
case 0: return MTL::SamplerAddressModeClampToEdge;
case 1: return MTL::SamplerAddressModeClampToBorderColor;
case 2: return MTL::SamplerAddressModeRepeat;
case 3: return MTL::SamplerAddressModeMirrorRepeat;
case 4: return MTL::SamplerAddressModeClampToEdge;
case 5: return MTL::SamplerAddressModeClampToBorderColor;
case 6: return MTL::SamplerAddressModeRepeat;
case 7: return MTL::SamplerAddressModeRepeat;
default: Helpers::panic("Unknown sampler address mode %u", addrMode);
}
return MTL::SamplerAddressModeClampToEdge;
}
} // namespace PICA

View file

@ -0,0 +1,207 @@
#pragma once
#include <Metal/Metal.hpp>
#include <QuartzCore/QuartzCore.hpp>
#include "mtl_blit_pipeline_cache.hpp"
#include "mtl_command_encoder.hpp"
#include "mtl_depth_stencil_cache.hpp"
#include "mtl_draw_pipeline_cache.hpp"
#include "mtl_lut_texture.hpp"
#include "mtl_render_target.hpp"
#include "mtl_texture.hpp"
#include "mtl_vertex_buffer_cache.hpp"
#include "renderer.hpp"
// HACK: use the OpenGL cache
#include "../renderer_gl/surface_cache.hpp"
class GPU;
struct Color4 {
float r, g, b, a;
};
class RendererMTL final : public Renderer {
public:
RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
~RendererMTL() override;
void reset() override;
void display() override;
void initGraphicsContext(SDL_Window* window) override;
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
void screenshot(const std::string& name) override;
void deinitGraphicsContext() override;
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
#endif
private:
CA::MetalLayer* metalLayer;
MTL::Device* device;
MTL::CommandQueue* commandQueue;
Metal::CommandEncoder commandEncoder;
// Libraries
MTL::Library* library;
// Caches
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
SurfaceCache<Metal::Texture, 256, true> textureCache;
Metal::BlitPipelineCache blitPipelineCache;
Metal::DrawPipelineCache drawPipelineCache;
Metal::DepthStencilCache depthStencilCache;
Metal::VertexBufferCache vertexBufferCache;
// Resources
MTL::SamplerState* nearestSampler;
MTL::SamplerState* linearSampler;
MTL::Texture* nullTexture;
MTL::DepthStencilState* defaultDepthStencilState;
Metal::LutTexture* lutLightingTexture;
Metal::LutTexture* lutFogTexture;
// Pipelines
MTL::RenderPipelineState* displayPipeline;
// MTL::RenderPipelineState* copyToLutTexturePipeline;
// Clears
std::map<MTL::Texture*, Color4> colorClearOps;
std::map<MTL::Texture*, float> depthClearOps;
std::map<MTL::Texture*, u8> stencilClearOps;
// Active state
MTL::CommandBuffer* commandBuffer = nullptr;
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
MTL::Texture* lastColorTexture = nullptr;
MTL::Texture* lastDepthTexture = nullptr;
// Debug
std::string nextRenderPassName;
void createCommandBufferIfNeeded() {
if (!commandBuffer) {
commandBuffer = commandQueue->commandBuffer();
}
}
void endRenderPass() {
if (renderCommandEncoder) {
renderCommandEncoder->endEncoding();
renderCommandEncoder = nullptr;
}
}
void beginRenderPassIfNeeded(
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr
);
void commitCommandBuffer() {
if (renderCommandEncoder) {
renderCommandEncoder->endEncoding();
renderCommandEncoder->release();
renderCommandEncoder = nullptr;
}
if (commandBuffer) {
commandBuffer->commit();
// HACK
commandBuffer->waitUntilCompleted();
commandBuffer->release();
commandBuffer = nullptr;
}
}
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline void clearAttachment(
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment,
SetClearDataT setClearData
) {
bool beginRenderPass = (renderPassDescriptor == nullptr);
if (!renderPassDescriptor) {
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
}
AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture);
setClearData(attachment, clearData);
attachment->setLoadAction(MTL::LoadActionClear);
attachment->setStoreAction(MTL::StoreActionStore);
if (beginRenderPass) {
if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value)
beginRenderPassIfNeeded(renderPassDescriptor, true, texture);
else
beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture);
}
}
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
inline bool clearAttachment(
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps,
GetAttachmentT getAttachment, SetClearDataT setClearData
) {
auto it = clearOps.find(texture);
if (it != clearOps.end()) {
clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData);
clearOps.erase(it);
return true;
}
if (renderPassDescriptor) {
AttachmentT* attachment = getAttachment(renderPassDescriptor);
attachment->setTexture(texture);
attachment->setLoadAction(MTL::LoadActionLoad);
attachment->setStoreAction(MTL::StoreActionStore);
}
return false;
}
bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(
renderPassDescriptor, texture, colorClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); },
[](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); }
);
}
bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(
renderPassDescriptor, texture, depthClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); },
[](auto attachment, auto& depth) { attachment->setClearDepth(depth); }
);
}
bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(
renderPassDescriptor, texture, stencilClearOps,
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); },
[](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); }
);
}
std::optional<Metal::ColorRenderTarget> getColorRenderTarget(
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true
);
Metal::DepthStencilRenderTarget& getDepthRenderTarget();
Metal::Texture& getTexture(Metal::Texture& tex);
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
void bindTexturesToSlots();
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
void textureCopyImpl(
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
const Math::Rect<u32>& destRect
);
};

View file

@ -11,7 +11,8 @@ struct Scheduler {
VBlank = 0, // End of frame event
UpdateTimers = 1, // Update kernel timer objects
RunDSP = 2, // Make the emulated DSP run for one audio frame
Panic = 3, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX)
SignalY2R = 3, // Signal that a Y2R conversion has finished
Panic = 4, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX)
TotalNumberOfEvents // How many event types do we have in total?
};
static constexpr usize totalNumberOfEvents = static_cast<usize>(EventType::TotalNumberOfEvents);

38
include/sdl_sensors.hpp Normal file
View file

@ -0,0 +1,38 @@
#pragma once
#include <cmath>
#include <glm/glm.hpp>
#include "helpers.hpp"
#include "services/hid.hpp"
// Convert SDL sensor readings to 3DS format
// We use the same code for Android as well, since the values we get from Android are in the same format as SDL (m/s^2 for acceleration, rad/s for
// rotation)
namespace Sensors::SDL {
// Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID
// Returns [pitch, roll, yaw]
static glm::vec3 convertRotation(glm::vec3 rotation) {
// Annoyingly, Android doesn't support the <numbers> header yet so we define pi ourselves
static constexpr double pi = 3.141592653589793;
// Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID
constexpr float scale = 180.f / pi * HIDService::gyroscopeCoeff;
// The axes are also inverted, so invert scale before the multiplication.
return rotation * -scale;
}
static glm::vec3 convertAcceleration(float* data) {
// Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930
// At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity.
// This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4.
static constexpr float accelMax = 9.f;
// We define standard gravity(g) ourself instead of using the SDL one in order for the code to work on Android too.
static constexpr float standardGravity = 9.80665f;
s16 x = std::clamp<s16>(s16(data[0] / accelMax * 930.f), -930, +930);
s16 y = std::clamp<s16>(s16(data[1] / (standardGravity * accelMax) * 930.f - 350.f), -930, +930);
s16 z = std::clamp<s16>(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930);
return glm::vec3(x, y, z);
}
} // namespace Sensors::SDL

View file

@ -8,6 +8,8 @@
#include "result/result.hpp"
class ACService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::AC;
Memory& mem;
MAKE_LOG_FUNCTION(log, acLogger)

View file

@ -6,6 +6,8 @@
#include "result/result.hpp"
class ACTService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::ACT;
Memory& mem;
MAKE_LOG_FUNCTION(log, actLogger)
@ -15,7 +17,7 @@ class ACTService {
void generateUUID(u32 messagePointer);
void getAccountDataBlock(u32 messagePointer);
public:
public:
ACTService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -6,6 +6,8 @@
#include "result/result.hpp"
class AMService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::AM;
Memory& mem;
MAKE_LOG_FUNCTION(log, amLogger)
@ -15,7 +17,7 @@ class AMService {
void getPatchTitleInfo(u32 messagePointer);
void listTitleInfo(u32 messagePointer);
public:
public:
AMService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -12,7 +12,8 @@
class Kernel;
enum class ConsoleModel : u32 {
Old3DS, New3DS
Old3DS,
New3DS,
};
// https://www.3dbrew.org/wiki/NS_and_APT_Services#Command
@ -41,6 +42,8 @@ namespace APT::Transitions {
}
class APTService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::APT;
Memory& mem;
Kernel& kernel;
@ -99,7 +102,7 @@ class APTService {
u32 screencapPostPermission;
public:
public:
APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -6,6 +6,8 @@
#include "result/result.hpp"
class BOSSService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::BOSS;
Memory& mem;
MAKE_LOG_FUNCTION(log, bossLogger)
@ -17,7 +19,7 @@ class BOSSService {
void getNewArrivalFlag(u32 messagePointer);
void getNsDataIdList(u32 messagePointer, u32 commandWord);
void getOptoutFlag(u32 messagePointer);
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
void getTaskIdList(u32 messagePointer);
void getTaskInfo(u32 messagePointer);
void getTaskServiceStatus(u32 messagePointer);
@ -35,7 +37,8 @@ class BOSSService {
void unregisterTask(u32 messagePointer);
s8 optoutFlag;
public:
public:
BOSSService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -12,6 +12,7 @@
class Kernel;
class CAMService {
using Handle = HorizonHandle;
using Event = std::optional<Handle>;
struct Port {

View file

@ -1,5 +1,6 @@
#pragma once
#include <optional>
#include "helpers.hpp"
#include "kernel_types.hpp"
#include "logger.hpp"
@ -9,6 +10,8 @@
class Kernel;
class CECDService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::CECD;
Memory& mem;
Kernel& kernel;
@ -20,7 +23,7 @@ class CECDService {
void getInfoEventHandle(u32 messagePointer);
void openAndRead(u32 messagePointer);
public:
public:
CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -1,5 +1,6 @@
#pragma once
#include <cstring>
#include "helpers.hpp"
#include "logger.hpp"
#include "memory.hpp"
@ -7,8 +8,10 @@
#include "result/result.hpp"
class CFGService {
using Handle = HorizonHandle;
Memory& mem;
CountryCodes country = CountryCodes::US; // Default to USA
CountryCodes country = CountryCodes::US; // Default to USA
MAKE_LOG_FUNCTION(log, cfgLogger)
void writeStringU16(u32 pointer, const std::u16string& string);
@ -27,12 +30,12 @@ class CFGService {
void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask);
public:
public:
enum class Type {
U, // cfg:u
I, // cfg:i
S, // cfg:s
NOR, // cfg:nor
U, // cfg:u
I, // cfg:i
S, // cfg:s
NOR, // cfg:nor
};
CFGService(Memory& mem) : mem(mem) {}

View file

@ -10,6 +10,8 @@
class Kernel;
class CSNDService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::CSND;
Memory& mem;
Kernel& kernel;
@ -30,7 +32,5 @@ class CSNDService {
void reset();
void handleSyncRequest(u32 messagePointer);
void setSharedMemory(u8* ptr) {
sharedMemory = ptr;
}
void setSharedMemory(u8* ptr) { sharedMemory = ptr; }
};

View file

@ -8,6 +8,8 @@
// Please forgive me for how everything in this file is named
// "dlp:SRVR" is not a nice name to work with
class DlpSrvrService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::DLP_SRVR;
Memory& mem;
MAKE_LOG_FUNCTION(log, dlpSrvrLogger)
@ -15,7 +17,7 @@ class DlpSrvrService {
// Service commands
void isChild(u32 messagePointer);
public:
public:
DlpSrvrService(Memory& mem) : mem(mem) {}
void reset();
void handleSyncRequest(u32 messagePointer);

View file

@ -14,6 +14,8 @@
class Kernel;
class DSPService {
using Handle = HorizonHandle;
Handle handle = KernelHandles::DSP;
Memory& mem;
Kernel& kernel;

View file

@ -0,0 +1,84 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.h
#pragma once
#include <memory>
#include "helpers.hpp"
#include "swap.hpp"
namespace HLE::Fonts {
struct CFNT {
u8 magic[4];
u16_le endianness;
u16_le headerSize;
u32_le version;
u32_le fileSize;
u32_le numBlocks;
};
struct SectionHeader {
u8 magic[4];
u32_le sectionSize;
};
struct FINF {
u8 magic[4];
u32_le sectionSize;
u8 fontType;
u8 lineFeed;
u16_le alterCharIndex;
u8 default_width[3];
u8 encoding;
u32_le tglpOffset;
u32_le cwdhOffset;
u32_le cmapOffset;
u8 height;
u8 width;
u8 ascent;
u8 reserved;
};
struct TGLP {
u8 magic[4];
u32_le sectionSize;
u8 cellWidth;
u8 cellHeight;
u8 baselinePosition;
u8 maxCharacterWidth;
u32_le sheetSize;
u16_le numSheets;
u16_le sheetImageFormat;
u16_le numColumns;
u16_le numRows;
u16_le sheetWidth;
u16_le sheetHeight;
u32_le sheetDataOffset;
};
struct CMAP {
u8 magic[4];
u32_le sectionSize;
u16_le codeBegin;
u16_le codeEnd;
u16_le mappingMethod;
u16_le reserved;
u32_le nextCmapOffset;
};
struct CWDH {
u8 magic[4];
u32_le sectionSize;
u16_le startIndex;
u16_le endIndex;
u32_le nextCwdhOffset;
};
// Relocates the internal addresses of the BCFNT Shared Font to the new base. The current base will
// be auto-detected based on the file headers.
void relocateSharedFont(u8* sharedFont, u32 newAddress);
} // namespace HLE::Fonts

Some files were not shown because too many files have changed in this diff Show more