diff --git a/.github/gles.patch b/.github/gles.patch index f1dc2c73..548b243d 100644 --- a/.github/gles.patch +++ b/.github/gles.patch @@ -1,52 +1,3 @@ -diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp -index a11a6ffa..77486a09 100644 ---- a/src/core/renderer_gl/renderer_gl.cpp -+++ b/src/core/renderer_gl/renderer_gl.cpp -@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() { - } - - glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::updateLightingLUT() { -- gpu.lightingLUTDirty = false; -- std::array u16_lightinglut; -- -- for (int i = 0; i < gpu.lightingLUT.size(); i++) { -- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -- u16_lightinglut[i] = value * 65535 / 4095; -- } -- -- glActiveTexture(GL_TEXTURE0 + 3); -- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -- glActiveTexture(GL_TEXTURE0); -+ // gpu.lightingLUTDirty = false; -+ // std::array u16_lightinglut; -+ -+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) { -+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); -+ // u16_lightinglut[i] = value * 65535 / 4095; -+ // } -+ -+ // glActiveTexture(GL_TEXTURE0 + 3); -+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); -+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -+ // glActiveTexture(GL_TEXTURE0); - } - - void RendererGL::drawVertices(PICA::PrimType primType, std::span vertices) { diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag index 612671c8..1937f711 100644 --- a/src/host_shaders/opengl_display.frag @@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644 void main() { diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag -index f6fa6c55..bb88e278 100644 +index 9f07df0b..96a35afa 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,4 +1,5 @@ @@ -78,36 +29,29 @@ index f6fa6c55..bb88e278 100644 +#version 300 es +precision mediump float; - in vec3 v_tangent; - in vec3 v_normal; -@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable; - uniform sampler2D u_tex0; - uniform sampler2D u_tex1; - uniform sampler2D u_tex2; --uniform sampler1DArray u_tex_lighting_lut; -+// uniform sampler1DArray u_tex_lighting_lut; + in vec4 v_quaternion; + in vec4 v_colour; +@@ -41,8 +42,8 @@ vec3 normal; + const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); - uniform uint u_picaRegs[0x200 - 0x48]; + bool isSamplerEnabled(uint environment_id, uint lut_id) { +- uint index = 7 * environment_id + lut_id; +- uint arrayIndex = (index >> 5); ++ uint index = 7u * environment_id + lut_id; ++ uint arrayIndex = (index >> 5u); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; + } -@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) { - #define RR_LUT 6u +@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; + } - float lutLookup(uint lut, uint light, float value) { -- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -- if (lut == SP_LUT) lut = light + 8; -- return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; -+ // if (lut == SP_LUT) lut = light + 8; -+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r; -+ return 0.0; -+} -+ -+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead ++// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead +uint bitfieldExtractCompat(uint val, int off, int size) { + uint mask = uint((1 << size) - 1); + return uint(val >> off) & mask; - } - ++} ++ vec3 regToColor(uint reg) { // Normalization scale to convert from [0...255] to [0.0...1.0] const float scale = 1.0 / 255.0; @@ -117,89 +61,115 @@ index f6fa6c55..bb88e278 100644 } // Convert an arbitrary-width floating point literal to an f32 -@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 view = normalize(v_view); +@@ -201,7 +208,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; +- } else if (lut_id <= 6) { ++ } else if (lut_id <= 6u) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { +@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + +- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { ++ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + +- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); ++ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; +- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); ++ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(normal, normalize(half_vector)); +@@ -243,9 +250,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually +- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); +- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); +- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); ++ int se_x = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 0, 13)); ++ int se_y = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 16, 13)); ++ int se_z = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_HIGH), 0, 13)); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; +@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light + } + + // 0 = enabled +- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse +- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { ++ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); +@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + // Implements the following algorthm: https://mathb.in/26766 + void calcLighting(out vec4 primary_color, out vec4 secondary_color) { uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } -@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - bool error_unimpl = false; +@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); + +- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); ++ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bump_mode) { +@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + +- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); +- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; ++ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4); ++ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { -- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); -+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); +- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); ++ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); -@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); +@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); - } -@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + light_vector = light_position + v_view; } - for (int c = 0; c < 7; c++) { -- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { -+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; - -- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); -+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) -@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), -+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { -@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; -- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); -+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - -- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); -+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; -@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - float NdotL = dot(normal, light_vector); // Li dot N +@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -207,19 +177,40 @@ index f6fa6c55..bb88e278 100644 NdotL = max(0.0, NdotL); else NdotL = abs(NdotL); -@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + + float geometric_factor; +- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; +- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; ++ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; ++ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } + + float distance_attenuation = 1.0; +- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { +- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); +- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); ++ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { ++ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); ++ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); +@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1); - - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert -index a25d7a6d..7cf40398 100644 +index 057f9a88..dc735ced 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -1,4 +1,6 @@ @@ -230,7 +221,7 @@ index a25d7a6d..7cf40398 100644 layout(location = 0) in vec4 a_coords; layout(location = 1) in vec4 a_quaternion; -@@ -20,7 +22,7 @@ out vec2 v_texcoord2; +@@ -18,7 +20,7 @@ out vec2 v_texcoord2; flat out vec4 v_textureEnvColor[6]; flat out vec4 v_textureEnvBufferColor; @@ -239,7 +230,7 @@ index a25d7a6d..7cf40398 100644 // TEV uniforms uniform uint u_textureEnvColor[6]; -@@ -93,6 +95,6 @@ void main() { +@@ -81,8 +83,8 @@ void main() { ); // There's also another, always-on clipping plane based on vertex z @@ -247,16 +238,20 @@ index a25d7a6d..7cf40398 100644 - gl_ClipDistance[1] = dot(clipData, a_coords); + // gl_ClipDistance[0] = -a_coords.z; + // gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp -index f368f573..5ead7f63 100644 +index 607815fa..cbfcc096 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp -@@ -520,21 +520,21 @@ namespace OpenGL { +@@ -602,22 +602,22 @@ namespace OpenGL { + static void disableScissor() { glDisable(GL_SCISSOR_TEST); } static void enableBlend() { glEnable(GL_BLEND); } static void disableBlend() { glDisable(GL_BLEND); } - static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } +- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); } - static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); } ++ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ } + static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ } static void enableDepth() { glEnable(GL_DEPTH_TEST); } static void disableDepth() { glDisable(GL_DEPTH_TEST); } diff --git a/.github/mac-bundle-qt.sh b/.github/mac-bundle-qt.sh index e18be8a5..f8083936 100644 --- a/.github/mac-bundle-qt.sh +++ b/.github/mac-bundle-qt.sh @@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec" # Construct the app iconset. mkdir alber.iconset -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png iconutil --convert icns alber.iconset # Set up the .app directory diff --git a/.github/mac-bundle.sh b/.github/mac-bundle.sh index 314b30f7..3349d6d3 100755 --- a/.github/mac-bundle.sh +++ b/.github/mac-bundle.sh @@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec" # Construct the app iconset. mkdir alber.iconset -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png -convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png +convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png iconutil --convert icns alber.iconset # Set up the .app directory diff --git a/.github/workflows/Android_Build.yml b/.github/workflows/Android_Build.yml index 11811f8b..b7e64f5f 100644 --- a/.github/workflows/Android_Build.yml +++ b/.github/workflows/Android_Build.yml @@ -8,7 +8,7 @@ on: jobs: x64: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: @@ -73,7 +73,7 @@ jobs: ./src/pandroid/app/build/outputs/apk/${{ env.BUILD_TYPE }}/app-${{ env.BUILD_TYPE }}.apk arm64: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: diff --git a/.github/workflows/HTTP_Build.yml b/.github/workflows/HTTP_Build.yml index 7bfe9c7f..c4f7cfee 100644 --- a/.github/workflows/HTTP_Build.yml +++ b/.github/workflows/HTTP_Build.yml @@ -16,10 +16,10 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive diff --git a/.github/workflows/Hydra_Build.yml b/.github/workflows/Hydra_Build.yml index a19974fb..785e0e4a 100644 --- a/.github/workflows/Hydra_Build.yml +++ b/.github/workflows/Hydra_Build.yml @@ -15,7 +15,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -32,18 +32,33 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - - name: Upload core - uses: actions/upload-artifact@v2 + - name: Upload Hydra core + uses: actions/upload-artifact@v4 with: - name: Windows core + name: Windows Hydra core path: '${{github.workspace}}/build/${{ env.BUILD_TYPE }}/Alber.dll' + - name: Configure CMake (Again) + run: | + rm -r -fo ${{github.workspace}}/build + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + + - name: Build (Again) + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Upload Libretro core + uses: actions/upload-artifact@v4 + with: + name: Windows Libretro core + path: | + ${{github.workspace}}/build/${{ env.BUILD_TYPE }}/panda3ds_libretro.dll + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info MacOS: runs-on: macos-13 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -61,22 +76,38 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload core - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: MacOS core + name: MacOS Hydra core path: '${{github.workspace}}/build/libAlber.dylib' + - name: Configure CMake (Again) + run: | + rm -rf ${{github.workspace}}/build + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + + - name: Build (Again) + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} && ls -R ${{github.workspace}}/build + + - name: Upload Libretro core + uses: actions/upload-artifact@v4 + with: + name: MacOS Libretro core + path: | + ${{github.workspace}}/build/panda3ds_libretro.dylib + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info + Linux: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev + sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev - name: Install newer Clang run: | @@ -98,22 +129,38 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload core - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: Linux core + name: Linux Hydra core path: '${{github.workspace}}/build/libAlber.so' + - name: Configure CMake (Again) + run: | + rm -rf ${{github.workspace}}/build + cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON + + - name: Build (Again) + run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} + + - name: Upload Libretro core + uses: actions/upload-artifact@v4 + with: + name: Linux Libretro core + path: | + ${{github.workspace}}/build/panda3ds_libretro.so + ${{github.workspace}}/docs/libretro/panda3ds_libretro.info + Android-x64: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev + sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev - name: Setup Vulkan SDK uses: humbletim/setup-vulkan-sdk@v1.2.0 @@ -129,7 +176,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload core - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: Android core + name: Android Hydra core path: '${{github.workspace}}/build/libAlber.so' diff --git a/.github/workflows/Linux_AppImage_Build.yml b/.github/workflows/Linux_AppImage_Build.yml index 507187a3..3c5af88a 100644 --- a/.github/workflows/Linux_AppImage_Build.yml +++ b/.github/workflows/Linux_AppImage_Build.yml @@ -16,15 +16,15 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages - run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 + run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev - name: Install newer Clang run: | @@ -33,11 +33,11 @@ jobs: sudo ./llvm.sh 17 - name: Setup Vulkan SDK - run: | - wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list - sudo apt update - sudo apt install vulkan-sdk + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: latest + vulkan-use-cache: true + vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. @@ -52,7 +52,7 @@ jobs: run: ./.github/linux-appimage.sh - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Linux executable path: './Alber-x86_64.AppImage' diff --git a/.github/workflows/Linux_Build.yml b/.github/workflows/Linux_Build.yml index 78e5cc5a..61f7eafa 100644 --- a/.github/workflows/Linux_Build.yml +++ b/.github/workflows/Linux_Build.yml @@ -16,15 +16,15 @@ jobs: # well on Windows or Mac. You can convert this to a matrix build if you need # cross-platform coverage. # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages - run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev + run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libwayland-dev - name: Install newer Clang run: | @@ -49,7 +49,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Linux executable path: './build/Alber' diff --git a/.github/workflows/MacOS_Build.yml b/.github/workflows/MacOS_Build.yml index f6fafde9..76b75bd4 100644 --- a/.github/workflows/MacOS_Build.yml +++ b/.github/workflows/MacOS_Build.yml @@ -19,7 +19,7 @@ jobs: runs-on: macos-13 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -52,7 +52,7 @@ jobs: run: zip -r Alber Alber.app - name: Upload MacOS App - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: MacOS Alber App Bundle path: 'Alber.zip' diff --git a/.github/workflows/Qt_Build.yml b/.github/workflows/Qt_Build.yml index 5e622c54..3b846a27 100644 --- a/.github/workflows/Qt_Build.yml +++ b/.github/workflows/Qt_Build.yml @@ -15,7 +15,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -45,7 +45,7 @@ jobs: windeployqt --dir upload upload/Alber.exe - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Windows executable path: upload @@ -54,7 +54,7 @@ jobs: runs-on: macos-13 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -90,23 +90,22 @@ jobs: run: zip -r Alber Alber.app - name: Upload MacOS App - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: MacOS Alber App Bundle path: 'Alber.zip' Linux: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive - name: Install misc packages run: | - sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev - sudo add-apt-repository -y ppa:savoury1/qt-6-2 + sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev libgl1-mesa-dev sudo apt update sudo apt install qt6-base-dev qt6-base-private-dev @@ -117,11 +116,11 @@ jobs: sudo ./llvm.sh 17 - name: Setup Vulkan SDK - run: | - wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list - sudo apt update - sudo apt install vulkan-sdk + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: latest + vulkan-use-cache: true + vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang - name: Configure CMake run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON @@ -135,7 +134,7 @@ jobs: ./.github/linux-appimage-qt.sh - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Linux executable path: './Alber-x86_64.AppImage' diff --git a/.github/workflows/Windows_Build.yml b/.github/workflows/Windows_Build.yml index a06889eb..5497c3ef 100644 --- a/.github/workflows/Windows_Build.yml +++ b/.github/workflows/Windows_Build.yml @@ -19,7 +19,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Fetch submodules run: git submodule update --init --recursive @@ -40,7 +40,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} - name: Upload executable - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Windows executable path: './build/${{ env.BUILD_TYPE }}/Alber.exe' diff --git a/.gitignore b/.gitignore index 528462ad..817786a3 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,9 @@ fb.bat *.elf *.smdh +# Compiled Metal shader files +*.ir +*.metallib + config.toml CMakeSettings.json diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..740ccf0c --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,130 @@ +# DESCRIPTION: GitLab CI/CD for libRetro (NOT FOR GitLab-proper) + +############################################################################## +################################# BOILERPLATE ################################ +############################################################################## + +# Core definitions +.core-defs: + variables: + GIT_SUBMODULE_STRATEGY: recursive + CORENAME: panda3ds + CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF + +# Inclusion templates, required for the build to work + +include: + ################################## DESKTOPS ################################ + # Linux + - project: 'libretro-infrastructure/ci-templates' + file: '/linux-cmake.yml' + + # Windows + - project: 'libretro-infrastructure/ci-templates' + file: '/windows-cmake-mingw.yml' + + # MacOS + - project: 'libretro-infrastructure/ci-templates' + file: 'osx-cmake-x86.yml' + + # MacOS + - project: 'libretro-infrastructure/ci-templates' + file: 'osx-cmake-arm64.yml' + + ################################## CELLULAR ################################ + # Android + - project: 'libretro-infrastructure/ci-templates' + file: '/android-cmake.yml' + + # iOS + - project: 'libretro-infrastructure/ci-templates' + file: '/ios-cmake.yml' + +# Stages for building +stages: + - build-prepare + - build-static + - build-shared + +############################################################################## +#################################### STAGES ################################## +############################################################################## +# +################################### DESKTOPS ################################# +# Linux 64-bit +libretro-build-linux-x64: + image: $CI_SERVER_HOST:5050/libretro-infrastructure/libretro-build-amd64-ubuntu:latest + before_script: + - export NUMPROC=$(($(nproc)/5)) + - sudo apt-get update -qy + - sudo apt-get install -qy software-properties-common + - sudo add-apt-repository -y ppa:savoury1/build-tools + - sudo add-apt-repository -y ppa:savoury1/gcc-defaults-12 + - sudo apt-get update -qy + - sudo apt-get install -qy cmake gcc-12 g++-12 + variables: + CC: /usr/bin/gcc-12 + CXX: /usr/bin/g++-12 + extends: + - .libretro-linux-cmake-x86_64 + - .core-defs + +# Windows 64-bit +libretro-build-windows-x64: + extends: + - .libretro-windows-cmake-x86_64 + - .core-defs + +# MacOS 64-bit +libretro-build-osx-x64: + tags: + - mac-apple-silicon + variables: + CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCRYPTOPP_AMD64=1 + extends: + - .libretro-osx-cmake-x86 + - .core-defs + +# MacOS arm 64-bit +libretro-build-osx-arm64: + tags: + - mac-apple-silicon + extends: + - .libretro-osx-cmake-arm64 + - .core-defs + +################################### CELLULAR ################################# +# Android ARMv7a +#android-armeabi-v7a: +# extends: +# - .libretro-android-cmake-armeabi-v7a +# - .core-defs + +# Android ARMv8a +# android-arm64-v8a: +# extends: +# - .libretro-android-cmake-arm64-v8a +# - .core-defs + +# Android 64-bit x86 +# android-x86_64: +# extends: +# - .libretro-android-cmake-x86_64 +# - .core-defs + +# Android 32-bit x86 +# android-x86: +# extends: +# - .libretro-android-cmake-x86 +# - .core-defs + +# iOS +# libretro-build-ios-arm64: +# extends: +# - .libretro-ios-cmake-arm64 +# - .core-defs +# variables: +# CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DBUILD_PLAY=OFF -DENABLE_AMAZON_S3=off -DBUILD_TESTS=OFF -DCMAKE_TOOLCHAIN_FILE=deps/Dependencies/cmake-ios/ios.cmake -DTARGET_IOS=ON +# LIBNAME: ${CORENAME}_libretro_ios.dylib + +################################### CONSOLES ################################# diff --git a/.gitmodules b/.gitmodules index 1f1d11fc..f1a70f41 100644 --- a/.gitmodules +++ b/.gitmodules @@ -70,3 +70,15 @@ [submodule "third_party/capstone"] path = third_party/capstone url = https://github.com/capstone-engine/capstone +[submodule "third_party/hips"] + path = third_party/hips + url = https://github.com/wheremyfoodat/Hips +[submodule "third_party/metal-cpp"] + path = third_party/metal-cpp + url = https://github.com/Panda3DS-emu/metal-cpp +[submodule "third_party/fmt"] + path = third_party/fmt + url = https://github.com/fmtlib/fmt +[submodule "third_party/fdk-aac"] + path = third_party/fdk-aac + url = https://github.com/Panda3DS-emu/fdk-aac/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 7db1ce33..cb7089ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,19 +19,37 @@ endif() project(Alber) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") if(APPLE) enable_language(OBJC) endif() +# Enable RC support in order to use resource files for application icons +if(WIN32) + enable_language(RC) + set(APP_RESOURCES docs/img/windows_icon.rc) +endif() + if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security") -endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-invalid-offsetof") +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size") +endif() + +if(ANDROID) + set(DEFAULT_OPENGL_PROFILE OpenGLES) +else() + set(DEFAULT_OPENGL_PROFILE OpenGL) +endif() option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON) option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF) option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON) option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON) +option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON) option(ENABLE_LTO "Enable link-time optimization" OFF) option(ENABLE_TESTS "Compile unit-tests" OFF) option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF) @@ -39,12 +57,65 @@ option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default)" ON) option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON) option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) +option(ENABLE_GIT_VERSIONING "Enables querying git for the emulator version" ON) option(BUILD_HYDRA_CORE "Build a Hydra core" OFF) +option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF) +option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON) +option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF) + +set(OPENGL_PROFILE ${DEFAULT_OPENGL_PROFILE} CACHE STRING "OpenGL profile to use if OpenGL is enabled. Valid values are 'OpenGL' and 'OpenGLES'.") +set_property(CACHE OPENGL_PROFILE PROPERTY STRINGS OpenGL OpenGLES) + +if(ENABLE_OPENGL AND (OPENGL_PROFILE STREQUAL "OpenGLES")) + message(STATUS "Building with OpenGLES support") + add_compile_definitions(USING_GLES) +endif() if(BUILD_HYDRA_CORE) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() +if(BUILD_LIBRETRO_CORE) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_compile_definitions(__LIBRETRO__) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD) + # Disable stack buffer overflow checks in user builds + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-") +endif() + +# Generate versioning files +find_package(Git) +set(PANDA3DS_VERSION "0.8") + +if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/version.hpp.in) + file(WRITE ${CMAKE_BINARY_DIR}/include/version.hpp.in "#define PANDA3DS_VERSION \"\${PANDA3DS_VERSION}\"") +endif() + +if(GIT_FOUND AND ENABLE_GIT_VERSIONING) + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 + OUTPUT_VARIABLE PANDA3DS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE + ) + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags + OUTPUT_VARIABLE git_version_tag OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT PANDA3DS_VERSION STREQUAL git_version_tag) + execute_process( + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --always --abbrev=7 + OUTPUT_VARIABLE git_version_rev OUTPUT_STRIP_TRAILING_WHITESPACE + ) + set(PANDA3DS_VERSION "${PANDA3DS_VERSION}.${git_version_rev}") + unset(git_version_rev) + endif() + string(REGEX REPLACE "^v" "" PANDA3DS_VERSION "${PANDA3DS_VERSION}") + unset(git_version_tag) +endif() +configure_file(${CMAKE_BINARY_DIR}/include/version.hpp.in ${CMAKE_BINARY_DIR}/include/version.hpp) +include_directories(${CMAKE_BINARY_DIR}/include/) + add_library(AlberCore STATIC) include_directories(${PROJECT_SOURCE_DIR}/include/) @@ -52,6 +123,7 @@ include_directories(${PROJECT_SOURCE_DIR}/include/kernel) include_directories(${FMT_INCLUDE_DIR}) include_directories(third_party/boost/) include_directories(third_party/elfio/) +include_directories(third_party/hips/include/) include_directories(third_party/imgui/) include_directories(third_party/dynarmic/src) include_directories(third_party/cryptopp/) @@ -82,10 +154,13 @@ if (NOT ANDROID) target_link_libraries(AlberCore PUBLIC SDL2-static) endif() +add_subdirectory(third_party/fmt) add_subdirectory(third_party/toml11) include_directories(${SDL2_INCLUDE_DIR}) include_directories(third_party/toml11) include_directories(third_party/glm) +include_directories(third_party/renderdoc) +include_directories(third_party/duckstation) add_subdirectory(third_party/cmrc) @@ -144,6 +219,18 @@ else() set(HOST_ARM64 FALSE) endif() +# Enable SSE4.1 if it's not explicitly disabled +# Annoyingly, we can't easily do this if we're using MSVC cause there's no SSE4.1 flag, only SSE4.1 +if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT DISABLE_SSE4 AND HOST_X64) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") +endif() + +if(ENABLE_RENDERDOC_API) + find_package(RenderDoc 1.6.0 MODULE REQUIRED) + add_compile_definitions(PANDA3DS_ENABLE_RENDERDOC) +endif() + if(HOST_X64 OR HOST_ARM64) set(DYNARMIC_TESTS OFF) #set(DYNARMIC_NO_BUNDLED_FMT ON) @@ -155,6 +242,7 @@ else() endif() add_subdirectory(third_party/teakra EXCLUDE_FROM_ALL) +add_subdirectory(third_party/fdk-aac) set(CAPSTONE_ARCHITECTURE_DEFAULT OFF) set(CAPSTONE_ARM_SUPPORT ON) @@ -166,7 +254,7 @@ set(SOURCE_FILES src/emulator.cpp src/io_file.cpp src/config.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp src/renderer.cpp src/core/renderer_null/renderer_null.cpp src/http_server.cpp src/stb_image_write.c src/core/cheats.cpp src/core/action_replay.cpp - src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp + src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp src/renderdoc.cpp ) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp @@ -187,12 +275,13 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services src/core/services/act.cpp src/core/services/nfc.cpp src/core/services/dlp_srvr.cpp src/core/services/ir_user.cpp src/core/services/http.cpp src/core/services/soc.cpp src/core/services/ssl.cpp src/core/services/news_u.cpp src/core/services/amiibo_device.cpp - src/core/services/csnd.cpp src/core/services/nwm_uds.cpp + src/core/services/csnd.cpp src/core/services/nwm_uds.cpp src/core/services/fonts.cpp ) set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp - src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp + src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp + src/core/PICA/shader_decompiler.cpp src/core/PICA/draw_acceleration.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -205,7 +294,7 @@ set(APPLET_SOURCE_FILES src/core/applets/applet.cpp src/core/applets/mii_selecto src/core/applets/error_applet.cpp ) set(AUDIO_SOURCE_FILES src/core/audio/dsp_core.cpp src/core/audio/null_core.cpp src/core/audio/teakra_core.cpp - src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp + src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp src/core/audio/aac_decoder.cpp ) set(RENDERER_SW_SOURCE_FILES src/core/renderer_sw/renderer_sw.cpp) @@ -224,7 +313,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp - include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp + include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp @@ -235,21 +324,24 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp - include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp + include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp - include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp + include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp - include/audio/hle_core.hpp include/capstone.hpp + include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp + include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp + include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp + include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp ) cmrc_add_resource_library( resources_console_fonts NAMESPACE ConsoleFonts WHENCE "src/core/services/fonts/" - "src/core/services/fonts/CitraSharedFontUSRelocated.bin" + "src/core/services/fonts/SharedFontReplacement.bin" ) set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp @@ -275,7 +367,6 @@ if(ENABLE_LUAJIT AND NOT ANDROID) endif() if(ENABLE_QT_GUI) - include_directories(third_party/duckstation) set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/window_info.cpp third_party/duckstation/gl/context.cpp) if(APPLE) @@ -308,7 +399,7 @@ if(ENABLE_OPENGL) set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp - include/renderer_gl/gl_state.hpp + include/renderer_gl/gl_state.hpp include/renderer_gl/gl_driver.hpp ) set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp @@ -318,6 +409,8 @@ if(ENABLE_OPENGL) src/host_shaders/opengl_fragment_shader.frag ) + set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/stream_buffer.cpp) + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES}) source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) @@ -338,8 +431,8 @@ endif() if(ENABLE_VULKAN) find_package( - Vulkan 1.3.206 REQUIRED - COMPONENTS glslangValidator + Vulkan REQUIRED + COMPONENTS glslang ) set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp @@ -382,7 +475,7 @@ if(ENABLE_VULKAN) add_custom_command( OUTPUT ${HOST_SHADER_SPIRV} COMMAND ${CMAKE_COMMAND} -E make_directory "${PROJECT_BINARY_DIR}/host_shaders/" - COMMAND Vulkan::glslangValidator ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV} + COMMAND glslang ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV} DEPENDS ${HOST_SHADER_SOURCE} ) list( APPEND RENDERER_VK_HOST_SHADERS_SPIRV ${HOST_SHADER_SPIRV} ) @@ -400,14 +493,88 @@ if(ENABLE_VULKAN) target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk) endif() +if(ENABLE_METAL AND APPLE) + set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp + include/renderer_mtl/mtl_depth_stencil_cache.hpp + include/renderer_mtl/mtl_blit_pipeline_cache.hpp + include/renderer_mtl/mtl_draw_pipeline_cache.hpp + include/renderer_mtl/mtl_render_target.hpp + include/renderer_mtl/mtl_texture.hpp + include/renderer_mtl/mtl_vertex_buffer_cache.hpp + include/renderer_mtl/mtl_lut_texture.hpp + include/renderer_mtl/mtl_command_encoder.hpp + include/renderer_mtl/mtl_common.hpp + include/renderer_mtl/pica_to_mtl.hpp + include/renderer_mtl/objc_helper.hpp + ) + + set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp + src/core/renderer_mtl/renderer_mtl.cpp + src/core/renderer_mtl/mtl_texture.cpp + src/core/renderer_mtl/mtl_etc1.cpp + src/core/renderer_mtl/mtl_lut_texture.cpp + src/core/renderer_mtl/objc_helper.mm + src/host_shaders/metal_shaders.metal + src/host_shaders/metal_blit.metal + #src/host_shaders/metal_copy_to_lut_texture.metal + ) + + set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES}) + source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES}) + + set(RENDERER_MTL_HOST_SHADERS_SOURCES) + function (add_metal_shader SHADER) + set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal") + set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir") + set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib") + # TODO: only include sources in debug builds + add_custom_command( + OUTPUT ${SHADER_IR} + COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE} + DEPENDS ${SHADER_SOURCE} + VERBATIM) + add_custom_command( + OUTPUT ${SHADER_METALLIB} + COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR} + DEPENDS ${SHADER_IR} + VERBATIM) + set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB}) + endfunction() + + add_metal_shader(metal_shaders) + add_metal_shader(metal_blit) + #add_metal_shader(metal_copy_to_lut_texture) + + add_custom_target( + compile_msl_shaders + DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES} + ) + + cmrc_add_resource_library( + resources_renderer_mtl + NAMESPACE RendererMTL + WHENCE "src/host_shaders/" + "src/host_shaders/metal_shaders.metallib" + "src/host_shaders/metal_blit.metallib" + #"src/host_shaders/metal_copy_to_lut_texture.metallib" + ) + add_dependencies(resources_renderer_mtl compile_msl_shaders) + + target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES}) + target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1") + target_include_directories(AlberCore PRIVATE third_party/metal-cpp) + # TODO: check if all of them are needed + target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl) +endif() + source_group("Header Files\\Core" FILES ${HEADER_FILES}) -set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} +set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES}) target_sources(AlberCore PRIVATE ${ALL_SOURCES}) -target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra) -target_link_libraries(AlberCore PUBLIC glad capstone) +target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra fdk-aac) +target_link_libraries(AlberCore PUBLIC glad capstone fmt::fmt) if(ENABLE_DISCORD_RPC AND NOT ANDROID) target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_DISCORD_RPC=1") @@ -438,7 +605,7 @@ else() target_compile_definitions(AlberCore PUBLIC "PANDA3DS_FRONTEND_SDL=1") endif() -if(NOT BUILD_HYDRA_CORE) +if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) add_executable(Alber) if(ENABLE_QT_GUI) @@ -447,11 +614,16 @@ if(NOT BUILD_HYDRA_CORE) message(FATAL_ERROR "Qt frontend requires OpenGL") endif() + option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF) + set(QT_LANGUAGES docs/translations) + set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp - ) + src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp + ) set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp + include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp ) source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES}) @@ -481,27 +653,47 @@ if(NOT BUILD_HYDRA_CORE) endif() endif() + # Generates an en.ts file for translations + # To update the file, use cmake --build --target Alber_lupdate + if(GENERATE_QT_TRANSLATION) + find_package(Qt6 REQUIRED COMPONENTS LinguistTools) + qt_add_lupdate(Alber TS_FILES ${QT_LANGUAGES}/en.ts + SOURCES ${FRONTEND_SOURCE_FILES} + INCLUDE_DIRECTORIES ${FRONTEND_HEADER_FILES} + NO_GLOBAL_TARGET + ) + endif() + qt_add_resources(AlberCore "app_images" PREFIX "/" FILES - docs/img/rsob_icon.png docs/img/rstarstruck_icon.png + docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png docs/img/rsyn_icon.png ) else() set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp) - set(FRONTEND_HEADER_FILES "") + set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp") endif() target_link_libraries(Alber PRIVATE AlberCore) - target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES}) + target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES} ${APP_RESOURCES}) elseif(BUILD_HYDRA_CORE) target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1) include_directories(third_party/hydra_core/include) add_library(Alber SHARED src/hydra_core.cpp) target_link_libraries(Alber PUBLIC AlberCore) +elseif(BUILD_LIBRETRO_CORE) + include_directories(third_party/libretro/include) + add_library(panda3ds_libretro SHARED src/libretro_core.cpp) + target_link_libraries(panda3ds_libretro PUBLIC AlberCore) + set_target_properties(panda3ds_libretro PROPERTIES PREFIX "") endif() if(ENABLE_LTO OR ENABLE_USER_BUILD) - set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) + if (NOT BUILD_LIBRETRO_CORE) + set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + set_target_properties(panda3ds_libretro PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() endif() if(ENABLE_TESTS) diff --git a/cmake/FindRenderDoc.cmake b/cmake/FindRenderDoc.cmake new file mode 100644 index 00000000..c00a0888 --- /dev/null +++ b/cmake/FindRenderDoc.cmake @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +# SPDX-License-Identifier: GPL-2.0-or-later + +set(RENDERDOC_INCLUDE_DIR third_party/renderdoc) + +if (RENDERDOC_INCLUDE_DIR AND EXISTS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h") + file(STRINGS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h" RENDERDOC_VERSION_LINE REGEX "typedef struct RENDERDOC_API") + string(REGEX REPLACE ".*typedef struct RENDERDOC_API_([0-9]+)_([0-9]+)_([0-9]+).*" "\\1.\\2.\\3" RENDERDOC_VERSION "${RENDERDOC_VERSION_LINE}") + unset(RENDERDOC_VERSION_LINE) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(RenderDoc + REQUIRED_VARS RENDERDOC_INCLUDE_DIR + VERSION_VAR RENDERDOC_VERSION +) + +if (RenderDoc_FOUND AND NOT TARGET RenderDoc::API) + add_library(RenderDoc::API INTERFACE IMPORTED) + set_target_properties(RenderDoc::API PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${RENDERDOC_INCLUDE_DIR}" + ) +endif() + +mark_as_advanced(RENDERDOC_INCLUDE_DIR) \ No newline at end of file diff --git a/docs/3ds/accelerometer_readings/readings_flat_1.png b/docs/3ds/accelerometer_readings/readings_flat_1.png new file mode 100644 index 00000000..b7a425fc Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_flat_1.png differ diff --git a/docs/3ds/accelerometer_readings/readings_flat_2.png b/docs/3ds/accelerometer_readings/readings_flat_2.png new file mode 100644 index 00000000..b23c1102 Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_flat_2.png differ diff --git a/docs/3ds/accelerometer_readings/readings_shaking_1.png b/docs/3ds/accelerometer_readings/readings_shaking_1.png new file mode 100644 index 00000000..91279149 Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_shaking_1.png differ diff --git a/docs/3ds/accelerometer_readings/readings_shaking_2.png b/docs/3ds/accelerometer_readings/readings_shaking_2.png new file mode 100644 index 00000000..551e7d2e Binary files /dev/null and b/docs/3ds/accelerometer_readings/readings_shaking_2.png differ diff --git a/docs/3ds/lighting.md b/docs/3ds/lighting.md new file mode 100644 index 00000000..8b6b9885 --- /dev/null +++ b/docs/3ds/lighting.md @@ -0,0 +1,79 @@ +## Info on the lighting implementation + +### Missing shadow attenuation +Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct +their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows. + +### Missing bump mapping +Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling +implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things, +namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation. + +Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl. + +### samplerEnabledBitfields +Holds the enabled state of the lighting samples for various PICA configurations +As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0 + +```c +const bool samplerEnabled[9 * 7] = bool[9 * 7]( + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true // Configuration 8: All +); +``` + +The above has been condensed to two uints for performance reasons. +You can confirm they are the same by running the following: +```c +const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu }; +for (int i = 0; i < 9 * 7; i++) { + unsigned arrayIndex = (i >> 5); + bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u; + if (samplerEnabled[i] == b) { + printf("%d: happy\n", i); + } else { + printf("%d: unhappy\n", i); + } +} +``` + +### lightLutLookup +lut_id is one of these values +0 D0 +1 D1 +2 SP +3 FR +4 RB +5 RG +6 RR + +lut_index on the other hand represents the actual index of the LUT in the texture +u_tex_luts has 24 LUTs for lighting and they are used like so: +0 D0 +1 D1 +2 is missing because SP uses LUTs 8-15 +3 FR +4 RB +5 RG +6 RR +8-15 SP0-7 +16-23 DA0-7, but this is not handled in this function as the lookup is a bit different + +The light environment configuration controls which LUTs are available for use +If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1 +If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue. + +### Distance attenuation +Distance attenuation is computed differently from the other factors, for example +it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use +GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the +fragment and the distance attenuation scale and bias to calculate where in the LUT to look up. +See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE \ No newline at end of file diff --git a/docs/img/KirbyAndroid.png b/docs/img/KirbyAndroid.png new file mode 100644 index 00000000..05e8b466 Binary files /dev/null and b/docs/img/KirbyAndroid.png differ diff --git a/docs/img/alber-icon.ico b/docs/img/mac_icon.ico similarity index 100% rename from docs/img/alber-icon.ico rename to docs/img/mac_icon.ico diff --git a/docs/img/rpog_icon.png b/docs/img/rpog_icon.png new file mode 100644 index 00000000..b5426aed Binary files /dev/null and b/docs/img/rpog_icon.png differ diff --git a/docs/img/rsyn_icon.png b/docs/img/rsyn_icon.png new file mode 100644 index 00000000..684c19b6 Binary files /dev/null and b/docs/img/rsyn_icon.png differ diff --git a/docs/img/windows_alt_icon.ico b/docs/img/windows_alt_icon.ico new file mode 100644 index 00000000..aa3593c3 Binary files /dev/null and b/docs/img/windows_alt_icon.ico differ diff --git a/docs/img/windows_icon.ico b/docs/img/windows_icon.ico new file mode 100644 index 00000000..340f251e Binary files /dev/null and b/docs/img/windows_icon.ico differ diff --git a/docs/img/windows_icon.rc b/docs/img/windows_icon.rc new file mode 100644 index 00000000..22dc105e --- /dev/null +++ b/docs/img/windows_icon.rc @@ -0,0 +1 @@ +AlberIcon ICON "windows_icon.ico" \ No newline at end of file diff --git a/docs/libretro/panda3ds_libretro.info b/docs/libretro/panda3ds_libretro.info new file mode 100644 index 00000000..40df7e22 --- /dev/null +++ b/docs/libretro/panda3ds_libretro.info @@ -0,0 +1,34 @@ +# Software Information +display_name = "Nintendo - 3DS (Panda3DS)" +authors = "Panda3DS Authors (tm)" +supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app" +corename = "Panda3DS" +categories = "Emulator" +license = "GPLv3" +permissions = "" +display_version = "Git" + +# Hardware Information +manufacturer = "Nintendo" +systemname = "3DS" +systemid = "3ds" + +# Libretro Information +database = "Nintendo - Nintendo 3DS" +supports_no_game = "false" +savestate = "true" +savestate_features = "basic" +cheats = "false" +input_descriptors = "true" +memory_descriptors = "false" +libretro_saves = "true" +core_options = "true" +core_options_version = "1.0" +load_subsystem = "false" +hw_render = "true" +required_hw_api = "OpenGL Core >= 4.1" +needs_fullpath = "true" +disk_control = "false" +is_experimental = "true" + +description = "Panda3DS !" diff --git a/include/PICA/draw_acceleration.hpp b/include/PICA/draw_acceleration.hpp new file mode 100644 index 00000000..6a66cdc1 --- /dev/null +++ b/include/PICA/draw_acceleration.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include + +#include "helpers.hpp" + +namespace PICA { + struct DrawAcceleration { + static constexpr u32 maxAttribCount = 16; + static constexpr u32 maxLoaderCount = 12; + + struct AttributeInfo { + u32 offset; + u32 stride; + + u8 type; + u8 componentCount; + + std::array fixedValue; // For fixed attributes + }; + + struct Loader { + // Data to upload for this loader + u8* data; + usize size; + }; + + u8* indexBuffer; + + // Minimum and maximum index in the index buffer for a draw call + u16 minimumIndex, maximumIndex; + u32 totalAttribCount; + u32 totalLoaderCount; + u32 enabledAttributeMask; + u32 fixedAttributes; + u32 vertexDataSize; + + std::array attributeInfo; + std::array loaders; + + bool canBeAccelerated; + bool indexed; + bool useShortIndices; + }; +} // namespace PICA \ No newline at end of file diff --git a/include/PICA/dynapica/pica_recs.hpp b/include/PICA/dynapica/pica_recs.hpp index acfd226e..eb0cf404 100644 --- a/include/PICA/dynapica/pica_recs.hpp +++ b/include/PICA/dynapica/pica_recs.hpp @@ -2,7 +2,7 @@ #include "helpers.hpp" #include "vertex_loader_rec.hpp" -// Common file for our PICA JITs (From vertex config -> CPU assembly and from PICA shader -> CPU assembly) +// Common file for our PICA JITs (From PICA shader -> CPU assembly) namespace Dynapica { #ifdef PANDA3DS_DYNAPICA_SUPPORTED diff --git a/include/PICA/dynapica/shader_rec.hpp b/include/PICA/dynapica/shader_rec.hpp index 2dabc128..a242d02f 100644 --- a/include/PICA/dynapica/shader_rec.hpp +++ b/include/PICA/dynapica/shader_rec.hpp @@ -22,8 +22,11 @@ class ShaderJIT { ShaderCache cache; #endif + bool accurateMul = false; public: + void setAccurateMul(bool value) { accurateMul = value; } + #ifdef PANDA3DS_SHADER_JIT_SUPPORTED // Call this before starting to process a batch of vertices // This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader @@ -36,11 +39,11 @@ class ShaderJIT { static constexpr bool isAvailable() { return true; } #else void prepare(PICAShader& shaderUnit) { - Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit"); + Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit"); } void run(PICAShader& shaderUnit) { - Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit"); + Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit"); } // Define dummy callback. This should never be called if the shader JIT is not supported diff --git a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp index 7411c430..9351f383 100644 --- a/include/PICA/dynapica/shader_rec_emitter_arm64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_arm64.hpp @@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator { // Shows whether the loaded shader has any log2 and exp2 instructions bool codeHasLog2 = false; bool codeHasExp2 = false; + // Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul + bool useSafeMUL = false; oaknut::Label log2Func, exp2Func; oaknut::Label emitLog2Func(); @@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator { PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer - ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {} + ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {} // PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer(instructionLabels.at(pc)); } diff --git a/include/PICA/dynapica/shader_rec_emitter_x64.hpp b/include/PICA/dynapica/shader_rec_emitter_x64.hpp index 0338911c..a43bd2dc 100644 --- a/include/PICA/dynapica/shader_rec_emitter_x64.hpp +++ b/include/PICA/dynapica/shader_rec_emitter_x64.hpp @@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator { Label negateVector; // Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i) Label onesVector; + // Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3 + Label dp3Vector; u32 recompilerPC = 0; // PC the recompiler is currently recompiling @ u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop) @@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator { // Shows whether the loaded shader has any log2 and exp2 instructions bool codeHasLog2 = false; bool codeHasExp2 = false; + // Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul + bool useSafeMUL = false; Xbyak::Label log2Func, exp2Func; Xbyak::Label emitLog2Func(); Xbyak::Label emitExp2Func(); Xbyak::util::Cpu cpuCaps; + // Emit a PICA200-compliant multiplication that handles "0 * inf = 0" + void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch); + // Compile all instructions from [current recompiler PC, end) void compileUntil(const PICAShader& shaderUnit, u32 endPC); // Compile instruction "instr" @@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator { PrologueCallback prologueCb = nullptr; // Initialize our emitter with "allocSize" bytes of RWX memory - ShaderEmitter() : Xbyak::CodeGenerator(allocSize) { + ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) { cpuCaps = Xbyak::util::Cpu(); haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41); diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 61020f76..c168a9bf 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include "PICA/draw_acceleration.hpp" #include "PICA/dynapica/shader_rec.hpp" #include "PICA/float_types.hpp" #include "PICA/pica_vertex.hpp" @@ -13,6 +14,12 @@ #include "memory.hpp" #include "renderer.hpp" +enum class ShaderExecMode { + Interpreter, // Interpret shaders on the CPU + JIT, // Recompile shaders to CPU machine code + Hardware, // Recompiler shaders to host shaders and run them on the GPU +}; + class GPU { static constexpr u32 regNum = 0x300; static constexpr u32 extRegNum = 0x1000; @@ -45,7 +52,7 @@ class GPU { uint immediateModeVertIndex; uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading - template + template void drawArrays(); // Silly method of avoiding linking problems. TODO: Change to something less silly @@ -81,6 +88,7 @@ class GPU { std::unique_ptr renderer; PICA::Vertex getImmediateModeVertex(); + void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed); public: // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT // Encoded in PICA native format @@ -92,6 +100,9 @@ class GPU { // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; + bool fogLUTDirty = false; + std::array fogLUT; + GPU(Memory& mem, EmulatorConfig& config); void display() { renderer->display(); } void screenshot(const std::string& name) { renderer->screenshot(name); } @@ -164,7 +175,8 @@ class GPU { u32 index = paddr - PhysicalAddrs::VRAM; return (T*)&vram[index]; } else [[unlikely]] { - Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr); + Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr); + return nullptr; } } diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp new file mode 100644 index 00000000..7b63a7b5 --- /dev/null +++ b/include/PICA/pica_frag_config.hpp @@ -0,0 +1,257 @@ +#pragma once +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + struct OutputConfig { + union { + u32 raw{}; + // Merge the enable + compare function into 1 field to avoid duplicate shaders + // enable == off means a CompareFunction of Always + BitField<0, 3, CompareFunction> alphaTestFunction; + BitField<3, 1, u32> depthMapEnable; + BitField<4, 4, LogicOpMode> logicOpMode; + }; + }; + + struct TextureConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // There's 6 TEV stages, and each one is configured via 4 word-sized registers + // (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders) + std::array tevConfigs; + }; + + struct FogConfig { + union { + u32 raw{}; + + BitField<0, 3, FogMode> mode; + BitField<3, 1, u32> flipDepth; + }; + }; + + struct Light { + union { + u16 raw; + BitField<0, 3, u16> num; + BitField<3, 1, u16> directional; + BitField<4, 1, u16> twoSidedDiffuse; + BitField<5, 1, u16> distanceAttenuationEnable; + BitField<6, 1, u16> spotAttenuationEnable; + BitField<7, 1, u16> geometricFactor0; + BitField<8, 1, u16> geometricFactor1; + BitField<9, 1, u16> shadowEnable; + }; + }; + + struct LightingLUTConfig { + union { + u32 raw; + BitField<0, 1, u32> enable; + BitField<1, 1, u32> absInput; + BitField<2, 3, u32> type; + BitField<5, 3, u32> scale; + }; + }; + + struct LightingConfig { + union { + u32 raw{}; + BitField<0, 1, u32> enable; + BitField<1, 4, u32> lightNum; + BitField<5, 2, u32> bumpMode; + BitField<7, 2, u32> bumpSelector; + BitField<9, 1, u32> bumpRenorm; + BitField<10, 1, u32> clampHighlights; + BitField<11, 4, u32> config; + BitField<15, 1, u32> enablePrimaryAlpha; + BitField<16, 1, u32> enableSecondaryAlpha; + BitField<17, 1, u32> enableShadow; + BitField<18, 1, u32> shadowPrimary; + BitField<19, 1, u32> shadowSecondary; + BitField<20, 1, u32> shadowInvert; + BitField<21, 1, u32> shadowAlpha; + BitField<22, 2, u32> shadowSelector; + }; + + std::array luts{}; + + std::array lights{}; + + LightingConfig(const std::array& regs) { + // Ignore lighting registers if it's disabled + if ((regs[InternalRegs::LightingEnable] & 1) == 0) { + return; + } + + const u32 config0 = regs[InternalRegs::LightConfig0]; + const u32 config1 = regs[InternalRegs::LightConfig1]; + const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1; + + enable = 1; + lightNum = totalLightCount; + + enableShadow = Helpers::getBit<0>(config0); + if (enableShadow) [[unlikely]] { + shadowPrimary = Helpers::getBit<16>(config0); + shadowSecondary = Helpers::getBit<17>(config0); + shadowInvert = Helpers::getBit<18>(config0); + shadowAlpha = Helpers::getBit<19>(config0); + shadowSelector = Helpers::getBits<24, 2>(config0); + } + + enablePrimaryAlpha = Helpers::getBit<2>(config0); + enableSecondaryAlpha = Helpers::getBit<3>(config0); + config = Helpers::getBits<4, 4>(config0); + + bumpSelector = Helpers::getBits<22, 2>(config0); + clampHighlights = Helpers::getBit<27>(config0); + bumpMode = Helpers::getBits<28, 2>(config0); + bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor + + for (int i = 0; i < totalLightCount; i++) { + auto& light = lights[i]; + light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7; + + const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num]; + light.directional = Helpers::getBit<0>(lightConfig); + light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig); + light.geometricFactor0 = Helpers::getBit<2>(lightConfig); + light.geometricFactor1 = Helpers::getBit<3>(lightConfig); + + light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled + light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here + light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here + } + + LightingLUTConfig& d0 = luts[Lights::LUT_D0]; + LightingLUTConfig& d1 = luts[Lights::LUT_D1]; + LightingLUTConfig& sp = luts[spotlightLutIndex]; + LightingLUTConfig& fr = luts[Lights::LUT_FR]; + LightingLUTConfig& rb = luts[Lights::LUT_RB]; + LightingLUTConfig& rg = luts[Lights::LUT_RG]; + LightingLUTConfig& rr = luts[Lights::LUT_RR]; + + d0.enable = Helpers::getBit<16>(config1) == 0; + d1.enable = Helpers::getBit<17>(config1) == 0; + fr.enable = Helpers::getBit<19>(config1) == 0; + rb.enable = Helpers::getBit<20>(config1) == 0; + rg.enable = Helpers::getBit<21>(config1) == 0; + rr.enable = Helpers::getBit<22>(config1) == 0; + sp.enable = 1; + + const u32 lutAbs = regs[InternalRegs::LightLUTAbs]; + const u32 lutSelect = regs[InternalRegs::LightLUTSelect]; + const u32 lutScale = regs[InternalRegs::LightLUTScale]; + + if (d0.enable) { + d0.absInput = Helpers::getBit<1>(lutAbs) == 0; + d0.type = Helpers::getBits<0, 3>(lutSelect); + d0.scale = Helpers::getBits<0, 3>(lutScale); + } + + if (d1.enable) { + d1.absInput = Helpers::getBit<5>(lutAbs) == 0; + d1.type = Helpers::getBits<4, 3>(lutSelect); + d1.scale = Helpers::getBits<4, 3>(lutScale); + } + + sp.absInput = Helpers::getBit<9>(lutAbs) == 0; + sp.type = Helpers::getBits<8, 3>(lutSelect); + sp.scale = Helpers::getBits<8, 3>(lutScale); + + if (fr.enable) { + fr.absInput = Helpers::getBit<13>(lutAbs) == 0; + fr.type = Helpers::getBits<12, 3>(lutSelect); + fr.scale = Helpers::getBits<12, 3>(lutScale); + } + + if (rb.enable) { + rb.absInput = Helpers::getBit<17>(lutAbs) == 0; + rb.type = Helpers::getBits<16, 3>(lutSelect); + rb.scale = Helpers::getBits<16, 3>(lutScale); + } + + if (rg.enable) { + rg.absInput = Helpers::getBit<21>(lutAbs) == 0; + rg.type = Helpers::getBits<20, 3>(lutSelect); + rg.scale = Helpers::getBits<20, 3>(lutScale); + } + + if (rr.enable) { + rr.absInput = Helpers::getBit<25>(lutAbs) == 0; + rr.type = Helpers::getBits<24, 3>(lutSelect); + rr.scale = Helpers::getBits<24, 3>(lutScale); + } + } + }; + + // Config used for identifying unique fragment pipeline configurations + struct FragmentConfig { + OutputConfig outConfig; + TextureConfig texConfig; + FogConfig fogConfig; + LightingConfig lighting; + + bool operator==(const FragmentConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + + FragmentConfig(const std::array& regs) : lighting(regs) { + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + + outConfig.alphaTestFunction = + (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; + + // Shows if blending is enabled. If it is not enabled, then logic ops are enabled instead + const bool blendingEnabled = (regs[InternalRegs::ColourOperation] & (1 << 8)) != 0; + outConfig.logicOpMode = blendingEnabled ? LogicOpMode::Copy : LogicOpMode(Helpers::getBits<0, 4>(regs[InternalRegs::LogicOp])); + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + + // Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like + // {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO +#define setupTevStage(stage) \ + std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \ + texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4]; + + setupTevStage(0); + setupTevStage(1); + setupTevStage(2); + setupTevStage(3); + setupTevStage(4); + setupTevStage(5); +#undef setupTevStage + + fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]); + + if (fogConfig.mode == FogMode::Fog) { + fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]); + } + } + }; + + static_assert( + std::has_unique_object_representations() && std::has_unique_object_representations() && + std::has_unique_object_representations() && std::has_unique_object_representations() + ); +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp new file mode 100644 index 00000000..781fdcd3 --- /dev/null +++ b/include/PICA/pica_frag_uniforms.hpp @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include + +#include "helpers.hpp" + +namespace PICA { + struct LightUniform { + using vec3 = std::array; + + // std140 requires vec3s be aligned to 16 bytes + alignas(16) vec3 specular0; + alignas(16) vec3 specular1; + alignas(16) vec3 diffuse; + alignas(16) vec3 ambient; + alignas(16) vec3 position; + alignas(16) vec3 spotlightDirection; + + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + + struct FragmentUniforms { + using vec3 = std::array; + using vec4 = std::array; + static constexpr usize tevStageCount = 6; + + s32 alphaReference; + float depthScale; + float depthOffset; + + alignas(16) vec4 constantColors[tevStageCount]; + alignas(16) vec4 tevBufferColor; + alignas(16) vec4 clipCoords; + + // Note: We upload these as a u32 and decode on GPU. + // Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU. + u32 globalAmbientLight; + u32 fogColor; + // NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it + LightUniform lightUniforms[8]; + }; + + // Assert that lightUniforms is the last member of the structure + static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms)); +} // namespace PICA \ No newline at end of file diff --git a/include/PICA/pica_simd.hpp b/include/PICA/pica_simd.hpp new file mode 100644 index 00000000..ae7d04eb --- /dev/null +++ b/include/PICA/pica_simd.hpp @@ -0,0 +1,274 @@ +#pragma once +#include +#include +#include + +#include "helpers.hpp" + +#if defined(_M_AMD64) || defined(__x86_64__) +#define PICA_SIMD_X64 +#include +#elif defined(_M_ARM64) || defined(__aarch64__) +#define PICA_SIMD_ARM64 +#include +#endif + +// Optimized functions for analyzing PICA index buffers (Finding minimum and maximum index values inside them) +namespace PICA::IndexBuffer { + // Non-SIMD, portable algorithm + template + std::pair analyzePortable(u8* indexBuffer, u32 vertexCount) { + u16 minimumIndex = std::numeric_limits::max(); + u16 maximumIndex = 0; + + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if constexpr (useShortIndices) { + u16* indexBuffer16 = reinterpret_cast(indexBuffer); + + for (u32 i = 0; i < vertexCount; i++) { + u16 index = indexBuffer16[i]; + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + } else { + for (u32 i = 0; i < vertexCount; i++) { + u16 index = u16(indexBuffer[i]); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + } + + return {minimumIndex, maximumIndex}; + } + +#ifdef PICA_SIMD_ARM64 + template + std::pair analyzeNEON(u8* indexBuffer, u32 vertexCount) { + // We process 16 bytes per iteration, which is 8 vertices if we're using u16 indices or 16 vertices if we're using u8 indices + constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16; + + if (vertexCount < vertsPerLoop) { + return analyzePortable(indexBuffer, vertexCount); + } + + u16 minimumIndex, maximumIndex; + + if constexpr (useShortIndices) { + // 16-bit indices + uint16x8_t minima = vdupq_n_u16(0xffff); + uint16x8_t maxima = vdupq_n_u16(0); + + while (vertexCount >= vertsPerLoop) { + const uint16x8_t data = vld1q_u16(reinterpret_cast(indexBuffer)); + minima = vminq_u16(data, minima); + maxima = vmaxq_u16(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + // Do horizontal min/max operations to get the actual minimum and maximum from all the vertices we processed with SIMD + // We want to gather the actual minimum and maximum in the line bottom lane of the minima/maxima vectors + // uint16x4_t foldedMinima1 = vmin_u16(vget_high_u16(minima), vget_low_u16(minima)); + // uint16x4_t foldedMaxima1 = vmax_u16(vget_high_u16(maxima), vget_low_u16(maxima)); + + uint16x8_t foldedMinima1 = vpminq_u16(minima, minima); + uint16x8_t foldedMinima2 = vpminq_u16(foldedMinima1, foldedMinima1); + uint16x8_t foldedMinima3 = vpminq_u16(foldedMinima2, foldedMinima2); + + uint16x8_t foldedMaxima1 = vpmaxq_u16(maxima, maxima); + uint16x8_t foldedMaxima2 = vpmaxq_u16(foldedMaxima1, foldedMaxima1); + uint16x8_t foldedMaxima3 = vpmaxq_u16(foldedMaxima2, foldedMaxima2); + + minimumIndex = vgetq_lane_u16(foldedMinima3, 0); + maximumIndex = vgetq_lane_u16(foldedMaxima3, 0); + } else { + // 8-bit indices + uint8x16_t minima = vdupq_n_u8(0xff); + uint8x16_t maxima = vdupq_n_u8(0); + + while (vertexCount >= vertsPerLoop) { + uint8x16_t data = vld1q_u8(indexBuffer); + minima = vminq_u8(data, minima); + maxima = vmaxq_u8(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + // Do a similar horizontal min/max as in the u16 case, except now we're working uint8x16 instead of uint16x4 so we need 4 folds + uint8x16_t foldedMinima1 = vpminq_u8(minima, minima); + uint8x16_t foldedMinima2 = vpminq_u8(foldedMinima1, foldedMinima1); + uint8x16_t foldedMinima3 = vpminq_u8(foldedMinima2, foldedMinima2); + uint8x16_t foldedMinima4 = vpminq_u8(foldedMinima3, foldedMinima3); + + uint8x16_t foldedMaxima1 = vpmaxq_u8(maxima, maxima); + uint8x16_t foldedMaxima2 = vpmaxq_u8(foldedMaxima1, foldedMaxima1); + uint8x16_t foldedMaxima3 = vpmaxq_u8(foldedMaxima2, foldedMaxima2); + uint8x16_t foldedMaxima4 = vpmaxq_u8(foldedMaxima3, foldedMaxima3); + + minimumIndex = u16(vgetq_lane_u8(foldedMinima4, 0)); + maximumIndex = u16(vgetq_lane_u8(foldedMaxima4, 0)); + } + + // If any indices could not be processed cause the buffer size is not 16-byte aligned, process them the naive way + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + while (vertexCount > 0) { + if constexpr (useShortIndices) { + u16 index = *reinterpret_cast(indexBuffer); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + indexBuffer += 2; + } else { + u16 index = u16(*indexBuffer++); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + + vertexCount -= 1; + } + + return {minimumIndex, maximumIndex}; + } +#endif + +#if defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + template + std::pair analyzeSSE4_1(u8* indexBuffer, u32 vertexCount) { + // We process 16 bytes per iteration, which is 8 vertices if we're using u16 + // indices or 16 vertices if we're using u8 indices + constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16; + + if (vertexCount < vertsPerLoop) { + return analyzePortable(indexBuffer, vertexCount); + } + + u16 minimumIndex, maximumIndex; + + if constexpr (useShortIndices) { + // Calculate the horizontal minimum/maximum value across an SSE vector of 16-bit unsigned integers. + // Based on https://stackoverflow.com/a/22259607 + auto horizontalMin16 = [](__m128i vector) -> u16 { return u16(_mm_cvtsi128_si32(_mm_minpos_epu16(vector))); }; + + auto horizontalMax16 = [](__m128i vector) -> u16 { + // We have an instruction to compute horizontal minimum but not maximum, so we use it. + // To use it, we have to subtract each value from 0xFFFF (which we do with an xor), then execute a horizontal minimum + __m128i flipped = _mm_xor_si128(vector, _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu)); + u16 min = u16(_mm_cvtsi128_si32(_mm_minpos_epu16(flipped))); + return u16(min ^ 0xffff); + }; + + // 16-bit indices + // Initialize the minima vector to all FFs (So 0xFFFF for each 16-bit lane) + // And the maxima vector to all 0s (0 for each 16-bit lane) + __m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu); + __m128i maxima = _mm_set_epi32(0, 0, 0, 0); + + while (vertexCount >= vertsPerLoop) { + const __m128i data = _mm_loadu_si128(reinterpret_cast(indexBuffer)); + minima = _mm_min_epu16(data, minima); + maxima = _mm_max_epu16(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + minimumIndex = u16(horizontalMin16(minima)); + maximumIndex = u16(horizontalMax16(maxima)); + } else { + // Calculate the horizontal minimum/maximum value across an SSE vector of 8-bit unsigned integers. + // Based on https://stackoverflow.com/a/22259607 + auto horizontalMin8 = [](__m128i vector) -> u8 { + vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2))); + vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_min_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_min_epu8(vector, _mm_srli_epi16(vector, 8)); + return u8(_mm_cvtsi128_si32(vector)); + }; + + auto horizontalMax8 = [](__m128i vector) -> u8 { + vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2))); + vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_max_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1))); + vector = _mm_max_epu8(vector, _mm_srli_epi16(vector, 8)); + return u8(_mm_cvtsi128_si32(vector)); + }; + + // 8-bit indices + // Initialize the minima vector to all FFs (So 0xFF for each 8-bit lane) + // And the maxima vector to all 0s (0 for each 8-bit lane) + __m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu); + __m128i maxima = _mm_set_epi32(0, 0, 0, 0); + + while (vertexCount >= vertsPerLoop) { + const __m128i data = _mm_loadu_si128(reinterpret_cast(indexBuffer)); + minima = _mm_min_epu8(data, minima); + maxima = _mm_max_epu8(data, maxima); + + indexBuffer += 16; + vertexCount -= vertsPerLoop; + } + + minimumIndex = u16(horizontalMin8(minima)); + maximumIndex = u16(horizontalMax8(maxima)); + } + + // If any indices could not be processed cause the buffer size + // is not 16-byte aligned, process them the naive way + // Calculate the minimum and maximum indices used in the index + // buffer, so we'll only upload them + while (vertexCount > 0) { + if constexpr (useShortIndices) { + u16 index = *reinterpret_cast(indexBuffer); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + indexBuffer += 2; + } else { + u16 index = u16(*indexBuffer++); + minimumIndex = std::min(minimumIndex, index); + maximumIndex = std::max(maximumIndex, index); + } + + vertexCount -= 1; + } + + return {minimumIndex, maximumIndex}; + } +#endif + + // Analyzes a PICA index buffer to get the minimum and maximum indices in the + // buffer, and returns them in a pair in the form [min, max]. Takes a template + // parameter to decide whether the indices in the buffer are u8 or u16 + template + std::pair analyze(u8* indexBuffer, u32 vertexCount) { +#if defined(PICA_SIMD_ARM64) + return analyzeNEON(indexBuffer, vertexCount); +#elif defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__)) + // Annoyingly, MSVC refuses to define __SSE4_1__ even when we're building with AVX + return analyzeSSE4_1(indexBuffer, vertexCount); +#else + return analyzePortable(indexBuffer, vertexCount); +#endif + } + + // In some really unfortunate scenarios (eg Android Studio emulator), we don't have access to glDrawRangeElementsBaseVertex + // So we need to subtract the base vertex index from every index in the index buffer ourselves + // This is not really common, so we do it without SIMD for the moment, just to be able to run on Android Studio + template + void subtractBaseIndex(u8* indexBuffer, u32 indexCount, u16 baseIndex) { + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if constexpr (useShortIndices) { + u16* indexBuffer16 = reinterpret_cast(indexBuffer); + + for (u32 i = 0; i < indexCount; i++) { + indexBuffer16[i] -= baseIndex; + } + } else { + u8 baseIndex8 = u8(baseIndex); + + for (u32 i = 0; i < indexCount; i++) { + indexBuffer[i] -= baseIndex8; + } + } + } +} // namespace PICA::IndexBuffer diff --git a/include/PICA/pica_vert_config.hpp b/include/PICA/pica_vert_config.hpp new file mode 100644 index 00000000..4300e454 --- /dev/null +++ b/include/PICA/pica_vert_config.hpp @@ -0,0 +1,57 @@ +#pragma once +#include +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + // Configuration struct used + struct VertConfig { + PICAHash::HashType shaderHash; + PICAHash::HashType opdescHash; + u32 entrypoint; + + // PICA registers for configuring shader output->fragment semantic mapping + std::array outmaps{}; + u16 outputMask; + u8 outputCount; + bool usingUbershader; + + // Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup + // As the padding will get hashed and memcmp'd... + u32 pad{}; + + bool operator==(const VertConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(VertConfig)) == 0; + } + + VertConfig(PICAShader& shader, const std::array& regs, bool usingUbershader) : usingUbershader(usingUbershader) { + shaderHash = shader.getCodeHash(); + opdescHash = shader.getOpdescHash(); + entrypoint = shader.entrypoint; + + outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7; + outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask]; + for (int i = 0; i < outputCount; i++) { + // Mask out unused bits + outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F; + } + } + }; +} // namespace PICA + +static_assert(sizeof(PICA::VertConfig) == 56); + +// Override std::hash for our vertex config class +template <> +struct std::hash { + std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 4342ebe5..3185d350 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -51,6 +51,18 @@ namespace PICA { #undef defineTexEnv // clang-format on + // Fog registers + FogColor = 0xE1, + FogLUTIndex = 0xE6, + FogLUTData0 = 0xE8, + FogLUTData1 = 0xE9, + FogLUTData2 = 0xEA, + FogLUTData3 = 0xEB, + FogLUTData4 = 0xEC, + FogLUTData5 = 0xED, + FogLUTData6 = 0xEE, + FogLUTData7 = 0xEF, + // Framebuffer registers ColourOperation = 0x100, BlendFunc = 0x101, @@ -67,7 +79,29 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, - //LightingRegs + // Lighting registers + LightingEnable = 0x8F, + Light0Specular0 = 0x140, + Light0Specular1 = 0x141, + Light0Diffuse = 0x142, + Light0Ambient = 0x143, + Light0XY = 0x144, + Light0Z = 0x145, + Light0SpotlightXY = 0x146, + Light0SpotlightZ = 0x147, + Light0Config = 0x149, + Light0AttenuationBias = 0x14A, + Light0AttenuationScale = 0x14B, + + LightGlobalAmbient = 0x1C0, + LightNumber = 0x1C2, + LightConfig0 = 0x1C3, + LightConfig1 = 0x1C4, + LightPermutation = 0x1D9, + LightLUTAbs = 0x1D0, + LightLUTSelect = 0x1D1, + LightLUTScale = 0x1D2, + LightingLUTIndex = 0x01C5, LightingLUTData0 = 0x01C8, LightingLUTData1 = 0x01C9, @@ -231,7 +265,8 @@ namespace PICA { enum : u32 { LUT_D0 = 0, LUT_D1, - LUT_FR, + // LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders + LUT_FR = 0x3, LUT_RB, LUT_RG, LUT_RR, @@ -255,6 +290,11 @@ namespace PICA { }; } + // There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15) + // We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup + // This is particularly intuitive in several places, such as checking if a LUT is enabled + static constexpr int spotlightLutIndex = 2; + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, @@ -345,4 +385,156 @@ namespace PICA { GeometryPrimitive = 3, }; + enum class CompareFunction : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + Less = 4, + LessOrEqual = 5, + Greater = 6, + GreaterOrEqual = 7, + }; + + enum class LogicOpMode : u32 { + Clear = 0, + And = 1, + ReverseAnd = 2, + Copy = 3, + Set = 4, + InvertedCopy = 5, + Nop = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + InvertedAnd = 13, + ReverseOr = 14, + InvertedOr = 15, + }; + + enum class FogMode : u32 { + Disabled = 0, + Fog = 5, + Gas = 7, + }; + + struct TexEnvConfig { + enum class Source : u8 { + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // TODO: Inbetween values are unknown + PreviousBuffer = 0xD, + Constant = 0xE, + Previous = 0xF, + }; + + enum class ColorOperand : u8 { + SourceColor = 0x0, + OneMinusSourceColor = 0x1, + SourceAlpha = 0x2, + OneMinusSourceAlpha = 0x3, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, + // TODO: Inbetween values are unknown + SourceGreen = 0x8, + OneMinusSourceGreen = 0x9, + // Inbetween values are unknown + SourceBlue = 0xC, + OneMinusSourceBlue = 0xD, + }; + + enum class AlphaOperand : u8 { + SourceAlpha = 0x0, + OneMinusSourceAlpha = 0x1, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, + OneMinusSourceGreen = 0x5, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, + }; + + enum class Operation : u8 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3RGB = 6, + Dot3RGBA = 7, + MultiplyAdd = 8, + AddMultiply = 9, + }; + + // RGB sources + Source colorSource1, colorSource2, colorSource3; + // Alpha sources + Source alphaSource1, alphaSource2, alphaSource3; + + // RGB operands + ColorOperand colorOperand1, colorOperand2, colorOperand3; + // Alpha operands + AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3; + + // Texture environment operations for this stage + Operation colorOp, alphaOp; + + u32 constColor; + + private: + // These are the only private members since their value doesn't actually reflect the scale + // So we make them public so we'll always use the appropriate member functions instead + u8 colorScale; + u8 alphaScale; + + public: + // Create texture environment object from TEV registers + TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) { + colorSource1 = Helpers::getBits<0, 4, Source>(source); + colorSource2 = Helpers::getBits<4, 4, Source>(source); + colorSource3 = Helpers::getBits<8, 4, Source>(source); + + alphaSource1 = Helpers::getBits<16, 4, Source>(source); + alphaSource2 = Helpers::getBits<20, 4, Source>(source); + alphaSource3 = Helpers::getBits<24, 4, Source>(source); + + colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand); + colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand); + colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand); + + alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand); + alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand); + alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand); + + colorOp = Helpers::getBits<0, 4, Operation>(combiner); + alphaOp = Helpers::getBits<16, 4, Operation>(combiner); + + colorScale = Helpers::getBits<0, 2>(scale); + alphaScale = Helpers::getBits<16, 2>(scale); + } + + u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } + u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + + bool isPassthroughStage() { + // clang-format off + // Thank you to the Citra dev that wrote this out + return ( + colorOp == Operation::Replace && alphaOp == Operation::Replace && + colorSource1 == Source::Previous && alphaSource1 == Source::Previous && + colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha && + getColorScale() == 1 && getAlphaScale() == 1 + ); + // clang-format on + } + }; } // namespace PICA diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 5b05e0b7..1040d2ff 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -1,6 +1,8 @@ #pragma once #include #include +#include +#include #include #include "PICA/float_types.hpp" @@ -21,7 +23,7 @@ namespace ShaderOpcodes { DST = 0x04, EX2 = 0x05, LG2 = 0x06, - LIT = 0x07, + LITP = 0x07, MUL = 0x08, SGE = 0x09, SLT = 0x0A, @@ -56,6 +58,10 @@ namespace ShaderOpcodes { }; } +namespace PICA::ShaderGen { + class ShaderDecompiler; +}; + // Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT class PICAShader { using f24 = Floats::f24; @@ -90,14 +96,22 @@ class PICAShader { public: // These are placed close to the temp registers and co because it helps the JIT generate better code u32 entrypoint = 0; // Initial shader PC - u32 boolUniform; - std::array, 4> intUniforms; + + // We want these registers in this order & with this alignment for uploading them directly to a UBO + // When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers. alignas(16) std::array floatUniforms; + alignas(16) std::array, 4> intUniforms; + u32 boolUniform; alignas(16) std::array fixedAttributes; // Fixed vertex attributes alignas(16) std::array inputs; // Attributes passed to the shader alignas(16) std::array outputs; alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT + + // We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT + // We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal + // Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first + using Hash = PICAHash::HashType; protected: std::array operandDescriptors; @@ -116,20 +130,20 @@ class PICAShader { std::array callInfo; ShaderType type; - // We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT - // We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal - // Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first - using Hash = PICAHash::HashType; - Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism) Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT) + public: + bool uniformsDirty = false; + + protected: bool codeHashDirty = false; bool opdescHashDirty = false; // Add these as friend classes for the JIT so it has access to all important state friend class ShaderJIT; friend class ShaderEmitter; + friend class PICA::ShaderGen::ShaderDecompiler; vec4f getSource(u32 source); vec4f& getDest(u32 dest); @@ -151,6 +165,7 @@ class PICAShader { void jmpc(u32 instruction); void jmpu(u32 instruction); void lg2(u32 instruction); + void litp(u32 instruction); void loop(u32 instruction); void mad(u32 instruction); void madi(u32 instruction); @@ -220,13 +235,9 @@ class PICAShader { public: static constexpr size_t maxInstructionCount = 4096; std::array loadedShader; // Currently loaded & active shader - std::array bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to PICAShader(ShaderType type) : type(type) {} - // Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them - void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); } - void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; } void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; } @@ -235,7 +246,7 @@ class PICAShader { Helpers::panic("o no, shader upload overflew"); } - bufferedShader[bufferIndex++] = word; + loadedShader[bufferIndex++] = word; bufferIndex &= 0xfff; codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed @@ -256,14 +267,16 @@ class PICAShader { void uploadFloatUniform(u32 word) { floatUniformBuffer[floatUniformWordCount++] = word; - if (floatUniformIndex >= 96) { - Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex); - } if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) { - vec4f& uniform = floatUniforms[floatUniformIndex++]; floatUniformWordCount = 0; + // Check if the program tries to upload to a non-existent uniform, and empty the queue without writing in that case + if (floatUniformIndex >= 96) [[unlikely]] { + return; + } + vec4f& uniform = floatUniforms[floatUniformIndex++]; + if (f32UniformTransfer) { uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]); uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]); @@ -275,6 +288,7 @@ class PICAShader { uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16)); uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8); } + uniformsDirty = true; } } @@ -286,6 +300,12 @@ class PICAShader { u[1] = getBits<8, 8>(word); u[2] = getBits<16, 8>(word); u[3] = getBits<24, 8>(word); + uniformsDirty = true; + } + + void uploadBoolUniform(u32 value) { + boolUniform = value; + uniformsDirty = true; } void run(); @@ -293,4 +313,13 @@ class PICAShader { Hash getCodeHash(); Hash getOpdescHash(); -}; \ No newline at end of file + + // Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU. + static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); } + void* getUniformPointer() { return static_cast(&floatUniforms); } +}; + +static_assert( + offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 && + offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4 +); \ No newline at end of file diff --git a/include/PICA/shader_decompiler.hpp b/include/PICA/shader_decompiler.hpp new file mode 100644 index 00000000..4a5cdc13 --- /dev/null +++ b/include/PICA/shader_decompiler.hpp @@ -0,0 +1,131 @@ +#pragma once +#include + +#include +#include +#include +#include +#include +#include + +#include "PICA/shader.hpp" +#include "PICA/shader_gen_types.hpp" + +struct EmulatorConfig; + +namespace PICA::ShaderGen { + // Control flow analysis is partially based on + // https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33 + struct ControlFlow { + // A continuous range of addresses + struct AddressRange { + u32 start, end; + AddressRange(u32 start, u32 end) : start(start), end(end) {} + + // Use lexicographic comparison for functions in order to sort them in a set + bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); } + }; + + struct Function { + using Labels = std::set; + + enum class ExitMode { + Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans) + AlwaysReturn, // All paths reach the return point. + Conditional, // One or more code paths reach the return point or an END instruction conditionally. + AlwaysEnd, // All paths reach an END instruction. + }; + + u32 start; // Starting PC of the function + u32 end; // End PC of the function + Labels outLabels{}; // Labels this function can "goto" (jump) to + ExitMode exitMode = ExitMode::Unknown; + + explicit Function(u32 start, u32 end) : start(start), end(end) {} + bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); } + + std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); } + // To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end + // instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called + // from within functions deep in the callstack + std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); } + std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); } + }; + + std::set functions{}; + std::map exitMap{}; + + // Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation + // On the CPU + bool analysisFailed = false; + + // This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions + const Function* addFunction(const PICAShader& shader, u32 start, u32 end) { + auto searchIterator = functions.find(Function(start, end)); + if (searchIterator != functions.end()) { + return &(*searchIterator); + } + + // Add this function and analyze it if it doesn't already exist + Function function(start, end); + function.exitMode = analyzeFunction(shader, start, end, function.outLabels); + + // This function could not be fully analyzed, report failure + if (function.exitMode == Function::ExitMode::Unknown) { + analysisFailed = true; + return nullptr; + } + + // Add function to our function list + auto [it, added] = functions.insert(std::move(function)); + return &(*it); + } + + void analyze(const PICAShader& shader, u32 entrypoint); + Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels); + }; + + class ShaderDecompiler { + using AddressRange = ControlFlow::AddressRange; + using Function = ControlFlow::Function; + + ControlFlow controlFlow{}; + + PICAShader& shader; + EmulatorConfig& config; + std::string decompiledShader; + + u32 entrypoint; + + API api; + Language language; + bool compilationError = false; + + void compileInstruction(u32& pc, bool& finished); + // Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction) + std::pair compileRange(const AddressRange& range); + void callFunction(const Function& function); + const Function* findFunction(const AddressRange& range); + + void writeAttributes(); + + std::string getSource(u32 source, u32 index) const; + std::string getDest(u32 dest) const; + std::string getSwizzlePattern(u32 swizzle) const; + std::string getDestSwizzle(u32 destinationMask) const; + const char* getCondition(u32 cond, u32 refX, u32 refY); + + void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value); + // Returns if the instruction uses the typical register encodings most instructions use + // With some exceptions like MAD/MADI, and the control flow instructions which are completely different + bool usesCommonEncoding(u32 instruction) const; + + public: + ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) + : shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {} + + std::string decompile(); + }; + + std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language); +} // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp new file mode 100644 index 00000000..b6751e05 --- /dev/null +++ b/include/PICA/shader_gen.hpp @@ -0,0 +1,43 @@ +#pragma once +#include + +#include "PICA/gpu.hpp" +#include "PICA/pica_frag_config.hpp" +#include "PICA/pica_vert_config.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader_gen_types.hpp" +#include "helpers.hpp" + +namespace PICA::ShaderGen { + class FragmentGenerator { + API api; + Language language; + + void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config); + void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + + void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config); + void compileLights(std::string& shader, const PICA::FragmentConfig& config); + void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID); + bool isSamplerEnabled(u32 environmentID, u32 lutID); + + void compileFog(std::string& shader, const PICA::FragmentConfig& config); + void compileLogicOps(std::string& shader, const PICA::FragmentConfig& config); + + public: + FragmentGenerator(API api, Language language) : api(api), language(language) {} + std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr); + std::string getDefaultVertexShader(); + // For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader + std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader); + + void setTarget(API api, Language language) { + this->api = api; + this->language = language; + } + }; +}; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/PICA/shader_gen_types.hpp b/include/PICA/shader_gen_types.hpp new file mode 100644 index 00000000..1877227f --- /dev/null +++ b/include/PICA/shader_gen_types.hpp @@ -0,0 +1,9 @@ +#pragma once + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; +} // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/PICA/shader_unit.hpp b/include/PICA/shader_unit.hpp index d8d93160..80e01346 100644 --- a/include/PICA/shader_unit.hpp +++ b/include/PICA/shader_unit.hpp @@ -2,10 +2,9 @@ #include "PICA/shader.hpp" class ShaderUnit { - -public: - PICAShader vs; // Vertex shader - PICAShader gs; // Geometry shader + public: + PICAShader vs; // Vertex shader + PICAShader gs; // Geometry shader ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {} void reset(); diff --git a/include/align.hpp b/include/align.hpp new file mode 100644 index 00000000..2f9a33db --- /dev/null +++ b/include/align.hpp @@ -0,0 +1,99 @@ +// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include + +#include "helpers.hpp" + +#ifdef _WIN32 +#include +#endif + +namespace Common { + template + constexpr bool isAligned(T value, unsigned int alignment) { + return (value % static_cast(alignment)) == 0; + } + + template + constexpr T alignUp(T value, unsigned int alignment) { + return (value + static_cast(alignment - 1)) / static_cast(alignment) * static_cast(alignment); + } + + template + constexpr T alignDown(T value, unsigned int alignment) { + return value / static_cast(alignment) * static_cast(alignment); + } + + template + constexpr bool isAlignedPow2(T value, unsigned int alignment) { + return (value & static_cast(alignment - 1)) == 0; + } + + template + constexpr T alignUpPow2(T value, unsigned int alignment) { + return (value + static_cast(alignment - 1)) & static_cast(~static_cast(alignment - 1)); + } + + template + constexpr T alignDownPow2(T value, unsigned int alignment) { + return value & static_cast(~static_cast(alignment - 1)); + } + + template + constexpr bool isPow2(T value) { + return (value & (value - 1)) == 0; + } + + template + constexpr T previousPow2(T value) { + if (value == static_cast(0)) return 0; + + value |= (value >> 1); + value |= (value >> 2); + value |= (value >> 4); + if constexpr (sizeof(T) >= 16) value |= (value >> 8); + if constexpr (sizeof(T) >= 32) value |= (value >> 16); + if constexpr (sizeof(T) >= 64) value |= (value >> 32); + return value - (value >> 1); + } + + template + constexpr T nextPow2(T value) { + // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + if (value == static_cast(0)) return 0; + + value--; + value |= (value >> 1); + value |= (value >> 2); + value |= (value >> 4); + if constexpr (sizeof(T) >= 16) value |= (value >> 8); + if constexpr (sizeof(T) >= 32) value |= (value >> 16); + if constexpr (sizeof(T) >= 64) value |= (value >> 32); + value++; + return value; + } + + ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) { +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + // Unaligned sizes are slow on macOS. +#ifdef __APPLE__ + if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1); +#endif + void* ret = nullptr; + return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr; +#endif + } + + ALWAYS_INLINE static void alignedFree(void* ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif + } +} // namespace Common diff --git a/include/audio/aac.hpp b/include/audio/aac.hpp new file mode 100644 index 00000000..389ecc04 --- /dev/null +++ b/include/audio/aac.hpp @@ -0,0 +1,82 @@ +#pragma once +#include +#include + +#include "helpers.hpp" +#include "swap.hpp" + +namespace Audio::AAC { + namespace ResultCode { + enum : u32 { + Success = 0, + }; + } + + // Enum values and struct definitions based off Citra + namespace Command { + enum : u16 { + Init = 0, // Initialize encoder/decoder + EncodeDecode = 1, // Encode/Decode AAC + Shutdown = 2, // Shutdown encoder/decoder + LoadState = 3, + SaveState = 4, + }; + } + + namespace SampleRate { + enum : u32 { + Rate48000 = 0, + Rate44100 = 1, + Rate32000 = 2, + Rate24000 = 3, + Rate22050 = 4, + Rate16000 = 5, + Rate12000 = 6, + Rate11025 = 7, + Rate8000 = 8, + }; + } + + namespace Mode { + enum : u16 { + None = 0, + Decode = 1, + Encode = 2, + }; + } + + struct DecodeResponse { + u32_le sampleRate; + u32_le channelCount; + u32_le size; + u32_le unknown1; + u32_le unknown2; + u32_le sampleCount; + }; + + struct DecodeRequest { + u32_le address; // Address of input AAC stream + u32_le size; // Size of input AAC stream + u32_le destAddrLeft; // Output address for left channel samples + u32_le destAddrRight; // Output address for right channel samples + u32_le unknown1; + u32_le unknown2; + }; + + struct Message { + u16_le mode = Mode::None; // Encode or decode AAC? + u16_le command = Command::Init; + u32_le resultCode = ResultCode::Success; + + // Info on the AAC request + union { + std::array commandData{}; + + DecodeResponse decodeResponse; + DecodeRequest decodeRequest; + }; + }; + + static_assert(sizeof(Message) == 32); + static_assert(std::is_trivially_copyable()); +} // namespace Audio::AAC diff --git a/include/audio/aac_decoder.hpp b/include/audio/aac_decoder.hpp new file mode 100644 index 00000000..6538bce9 --- /dev/null +++ b/include/audio/aac_decoder.hpp @@ -0,0 +1,24 @@ +#pragma once +#include + +#include "audio/aac.hpp" +#include "helpers.hpp" + +struct AAC_DECODER_INSTANCE; + +namespace Audio::AAC { + class Decoder { + using DecoderHandle = AAC_DECODER_INSTANCE*; + using PaddrCallback = std::function; + + DecoderHandle decoderHandle = nullptr; + + bool isInitialized() { return decoderHandle != nullptr; } + void initialize(); + + public: + // Decode function. Takes in a reference to the AAC response & request, and a callback for paddr -> pointer conversions + void decode(AAC::Message& response, const AAC::Message& request, PaddrCallback paddrCallback); + ~Decoder(); + }; +} // namespace Audio::AAC \ No newline at end of file diff --git a/include/audio/dsp_core.hpp b/include/audio/dsp_core.hpp index a4fb1ab1..5addfd19 100644 --- a/include/audio/dsp_core.hpp +++ b/include/audio/dsp_core.hpp @@ -43,7 +43,7 @@ namespace Audio { virtual ~DSPCore() {} virtual void reset() = 0; - virtual void runAudioFrame() = 0; + virtual void runAudioFrame(u64 eventTimestamp) = 0; virtual u8* getDspMemory() = 0; virtual u16 recvData(u32 regId) = 0; diff --git a/include/audio/dsp_shared_mem.hpp b/include/audio/dsp_shared_mem.hpp index 148986f9..272edf7e 100644 --- a/include/audio/dsp_shared_mem.hpp +++ b/include/audio/dsp_shared_mem.hpp @@ -294,12 +294,12 @@ namespace Audio::HLE { struct SourceStatus { struct Status { - u8 isEnabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) + u8 enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) u8 currentBufferIDDirty; ///< Non-zero when current_buffer_id changes u16_le syncCount; ///< Is set by the DSP to the value of SourceConfiguration::sync_count - u32_dsp bufferPosition; ///< Number of samples into the current buffer + u32_dsp samplePosition; ///< Number of samples into the current buffer u16_le currentBufferID; ///< Updated when a buffer finishes playing - u16_le lastBufferID; ///< Updated when all buffers in the queue finish playing + u16_le previousBufferID; ///< Updated when all buffers in the queue finish playing }; Status status[sourceCount]; @@ -324,8 +324,8 @@ namespace Audio::HLE { BitField<15, 1, u32> outputBufferCountDirty; BitField<16, 1, u32> masterVolumeDirty; - BitField<24, 1, u32> auxReturnVolume0Dirty; - BitField<25, 1, u32> auxReturnVolume1Dirty; + BitField<24, 1, u32> auxVolume0Dirty; + BitField<25, 1, u32> auxVolume1Dirty; BitField<26, 1, u32> outputFormatDirty; BitField<27, 1, u32> clippingModeDirty; BitField<28, 1, u32> headphonesConnectedDirty; @@ -337,7 +337,7 @@ namespace Audio::HLE { /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for /// each at the final mixer. float_le masterVolume; - std::array auxReturnVolume; + std::array auxVolumes; u16_le outputBufferCount; u16 pad1[2]; @@ -422,7 +422,7 @@ namespace Audio::HLE { struct DspStatus { u16_le unknown; - u16_le dropped_frames; + u16_le droppedFrames; u16 pad0[0xE]; }; ASSERT_DSP_STRUCT(DspStatus, 32); diff --git a/include/audio/hle_core.hpp b/include/audio/hle_core.hpp index 257ab5ac..bd717237 100644 --- a/include/audio/hle_core.hpp +++ b/include/audio/hle_core.hpp @@ -2,9 +2,12 @@ #include #include #include +#include #include #include +#include "audio/aac.hpp" +#include "audio/aac_decoder.hpp" #include "audio/dsp_core.hpp" #include "audio/dsp_shared_mem.hpp" #include "memory.hpp" @@ -41,15 +44,25 @@ namespace Audio { return this->bufferID > other.bufferID; } }; + // Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque? using SampleBuffer = std::deque>; using BufferQueue = std::priority_queue; BufferQueue buffers; + SampleFormat sampleFormat = SampleFormat::ADPCM; + SourceType sourceType = SourceType::Stereo; + std::array gain0, gain1, gain2; + u32 samplePosition; // Sample number into the current audio buffer + float rateMultiplier; u16 syncCount; - bool enabled; // Is the source enabled? + u16 currentBufferID; + u16 previousBufferID; + + bool enabled; // Is the source enabled? + bool isBufferIDDirty = false; // Did we change buffers? // ADPCM decoding info: // An array of fixed point S5.11 coefficients. These provide "weights" for the history samples @@ -65,6 +78,10 @@ namespace Audio { int index = 0; // Index of the voice in [0, 23] for debugging void reset(); + + // Push a buffer to the buffer queue + void pushBuffer(const Buffer& buffer) { buffers.push(buffer); } + // Pop a buffer from the buffer queue and return it Buffer popBuffer() { assert(!buffers.empty()); @@ -78,8 +95,7 @@ namespace Audio { DSPSource() { reset(); } }; - class HLE_DSP : public DSPCore { - // The audio frame types are public in case we want to use them for unit tests + class DSPMixer { public: template using Sample = std::array; @@ -96,6 +112,43 @@ namespace Audio { template using QuadFrame = Frame; + private: + using ChannelFormat = HLE::DspConfiguration::OutputFormat; + // The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages + // Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU + static constexpr usize mixerStageCount = 3; + + public: + ChannelFormat channelFormat = ChannelFormat::Stereo; + std::array volumes; + std::array enableAuxStages; + + void reset() { + channelFormat = ChannelFormat::Stereo; + + volumes.fill(0.0); + enableAuxStages.fill(false); + } + }; + + class HLE_DSP : public DSPCore { + // The audio frame types are public in case we want to use them for unit tests + public: + template + using Sample = DSPMixer::Sample; + + template + using Frame = DSPMixer::Frame; + + template + using MonoFrame = DSPMixer::MonoFrame; + + template + using StereoFrame = DSPMixer::StereoFrame; + + template + using QuadFrame = DSPMixer::QuadFrame; + using Source = Audio::DSPSource; using SampleBuffer = Source::SampleBuffer; @@ -114,8 +167,8 @@ namespace Audio { std::array sources; // DSP voices Audio::HLE::DspMemory dspRam; - SampleFormat sampleFormat = SampleFormat::ADPCM; - SourceType sourceType = SourceType::Stereo; + Audio::DSPMixer mixer; + std::unique_ptr aacDecoder; void resetAudioPipe(); bool loaded = false; // Have we loaded a component? @@ -132,7 +185,7 @@ namespace Audio { } else if (counter1 == 0xffff && counter0 != 0xfffe) { return 0; } else { - return counter0 > counter1 ? 0 : 0; + return (counter0 > counter1) ? 0 : 1; } } @@ -157,11 +210,20 @@ namespace Audio { } } + void handleAACRequest(const AAC::Message& request); void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients); + void updateMixerConfig(HLE::SharedMemory& sharedMem); void generateFrame(StereoFrame& frame); + void generateFrame(DSPSource& source); void outputFrame(); + // Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame + void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion); + // Decode an entire buffer worth of audio void decodeBuffer(DSPSource& source); + + SampleBuffer decodePCM8(const u8* data, usize sampleCount, Source& source); + SampleBuffer decodePCM16(const u8* data, usize sampleCount, Source& source); SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source); public: @@ -169,7 +231,7 @@ namespace Audio { ~HLE_DSP() override {} void reset() override; - void runAudioFrame() override; + void runAudioFrame(u64 eventTimestamp) override; u8* getDspMemory() override { return dspRam.rawMemory.data(); } diff --git a/include/audio/null_core.hpp b/include/audio/null_core.hpp index 7d6f1c9e..bedec8d3 100644 --- a/include/audio/null_core.hpp +++ b/include/audio/null_core.hpp @@ -27,7 +27,7 @@ namespace Audio { ~NullDSP() override {} void reset() override; - void runAudioFrame() override; + void runAudioFrame(u64 eventTimestamp) override; u8* getDspMemory() override { return dspRam.data(); } diff --git a/include/audio/teakra_core.hpp b/include/audio/teakra_core.hpp index 6a011231..17104985 100644 --- a/include/audio/teakra_core.hpp +++ b/include/audio/teakra_core.hpp @@ -83,7 +83,7 @@ namespace Audio { void reset() override; // Run 1 slice of DSP instructions and schedule the next audio frame - void runAudioFrame() override { + void runAudioFrame(u64 eventTimestamp) override { runSlice(); scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2); } diff --git a/include/config.hpp b/include/config.hpp index 2333c682..81a03385 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -7,16 +7,33 @@ // Remember to initialize every field here to its default value otherwise bad things will happen struct EmulatorConfig { // Only enable the shader JIT by default on platforms where it's completely tested -#ifdef PANDA3DS_X64_HOST +#if defined(PANDA3DS_X64_HOST) || defined(PANDA3DS_ARM64_HOST) static constexpr bool shaderJitDefault = true; #else static constexpr bool shaderJitDefault = false; #endif + // For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are + // horrible. On other platforms we default to ubershader + shadergen fallback for lights +#if defined(__ANDROID__) || defined(__APPLE__) + static constexpr bool ubershaderDefault = false; +#else + static constexpr bool ubershaderDefault = true; +#endif + static constexpr bool accelerateShadersDefault = true; + bool shaderJitEnabled = shaderJitDefault; + bool useUbershaders = ubershaderDefault; + bool accelerateShaders = accelerateShadersDefault; + bool accurateShaderMul = false; bool discordRpcEnabled = false; + + // Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance + bool forceShadergenForLights = true; + int lightShadergenThreshold = 1; + RendererType rendererType = RendererType::OpenGL; - Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; + Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE; bool sdCardInserted = true; bool sdWriteProtected = false; @@ -25,6 +42,9 @@ struct EmulatorConfig { bool audioEnabled = false; bool vsyncEnabled = true; + bool enableRenderdoc = false; + bool printAppVersion = true; + bool chargerPlugged = true; // Default to 3% battery to make users suffer int batteryPercentage = 3; @@ -33,7 +53,25 @@ struct EmulatorConfig { std::filesystem::path defaultRomPath = ""; std::filesystem::path filePath; + // Frontend window settings + struct WindowSettings { + static constexpr int defaultX = 200; + static constexpr int defaultY = 200; + static constexpr int defaultWidth = 800; + static constexpr int defaultHeight = 240 * 2; + + bool rememberPosition = false; // Remember window position & size + bool showAppVersion = false; + + int x = defaultX; + int y = defaultY; + int width = defaultHeight; + int height = defaultHeight; + }; + + WindowSettings windowSettings; + EmulatorConfig(const std::filesystem::path& path); void load(); void save(); -}; \ No newline at end of file +}; diff --git a/include/crypto/aes_engine.hpp b/include/crypto/aes_engine.hpp index 324f4adf..c96b36d3 100644 --- a/include/crypto/aes_engine.hpp +++ b/include/crypto/aes_engine.hpp @@ -1,20 +1,29 @@ #pragma once #include -#include -#include #include +#include +#include #include #include +#include #include "helpers.hpp" +#include "io_file.hpp" +#include "swap.hpp" namespace Crypto { - constexpr std::size_t AesKeySize = 0x10; + constexpr usize AesKeySize = 0x10; using AESKey = std::array; - template - static std::array rolArray(const std::array& value, std::size_t bits) { + struct Seed { + u64_le titleID; + AESKey seed; + std::array pad; + }; + + template + static std::array rolArray(const std::array& value, usize bits) { const auto bitWidth = N * CHAR_BIT; bits %= bitWidth; @@ -24,18 +33,18 @@ namespace Crypto { std::array result; - for (std::size_t i = 0; i < N; i++) { + for (usize i = 0; i < N; i++) { result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX; } return result; } - template + template static std::array addArray(const std::array& a, const std::array& b) { std::array result; - std::size_t sum = 0; - std::size_t carry = 0; + usize sum = 0; + usize carry = 0; for (std::int64_t i = N - 1; i >= 0; i--) { sum = a[i] + b[i] + carry; @@ -46,11 +55,11 @@ namespace Crypto { return result; } - template + template static std::array xorArray(const std::array& a, const std::array& b) { std::array result; - for (std::size_t i = 0; i < N; i++) { + for (usize i = 0; i < N; i++) { result[i] = a[i] ^ b[i]; } @@ -63,7 +72,7 @@ namespace Crypto { } AESKey rawKey; - for (std::size_t i = 0; i < rawKey.size(); i++) { + for (usize i = 0; i < rawKey.size(); i++) { rawKey[i] = static_cast(std::stoi(hex.substr(i * 2, 2), 0, 16)); } @@ -76,7 +85,7 @@ namespace Crypto { std::optional normalKey = std::nullopt; }; - enum KeySlotId : std::size_t { + enum KeySlotId : usize { NCCHKey0 = 0x2C, NCCHKey1 = 0x25, NCCHKey2 = 0x18, @@ -84,14 +93,17 @@ namespace Crypto { }; class AESEngine { - private: - constexpr static std::size_t AesKeySlotCount = 0x40; + private: + constexpr static usize AesKeySlotCount = 0x40; std::optional m_generator = std::nullopt; std::array m_slots; bool keysLoaded = false; - constexpr void updateNormalKey(std::size_t slotId) { + std::vector seeds; + IOFile seedDatabase; + + constexpr void updateNormalKey(usize slotId) { if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) { auto& keySlot = m_slots.at(slotId); AESKey keyX = keySlot.keyX.value(); @@ -101,13 +113,17 @@ namespace Crypto { } } - public: + public: AESEngine() {} void loadKeys(const std::filesystem::path& path); + void setSeedPath(const std::filesystem::path& path); + // Returns true on success, false on failure + bool loadSeeds(); + bool haveKeys() { return keysLoaded; } bool haveGenerator() { return m_generator.has_value(); } - constexpr bool hasKeyX(std::size_t slotId) { + constexpr bool hasKeyX(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -115,18 +131,16 @@ namespace Crypto { return m_slots.at(slotId).keyX.has_value(); } - constexpr AESKey getKeyX(std::size_t slotId) { - return m_slots.at(slotId).keyX.value_or(AESKey{}); - } + constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); } - constexpr void setKeyX(std::size_t slotId, const AESKey &key) { + constexpr void setKeyX(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).keyX = key; updateNormalKey(slotId); } } - constexpr bool hasKeyY(std::size_t slotId) { + constexpr bool hasKeyY(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -134,18 +148,16 @@ namespace Crypto { return m_slots.at(slotId).keyY.has_value(); } - constexpr AESKey getKeyY(std::size_t slotId) { - return m_slots.at(slotId).keyY.value_or(AESKey{}); - } + constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); } - constexpr void setKeyY(std::size_t slotId, const AESKey &key) { + constexpr void setKeyY(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).keyY = key; updateNormalKey(slotId); } } - constexpr bool hasNormalKey(std::size_t slotId) { + constexpr bool hasNormalKey(usize slotId) { if (slotId >= AesKeySlotCount) { return false; } @@ -153,14 +165,14 @@ namespace Crypto { return m_slots.at(slotId).normalKey.has_value(); } - constexpr AESKey getNormalKey(std::size_t slotId) { - return m_slots.at(slotId).normalKey.value_or(AESKey{}); - } + constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); } - constexpr void setNormalKey(std::size_t slotId, const AESKey &key) { + constexpr void setNormalKey(usize slotId, const AESKey& key) { if (slotId < AesKeySlotCount) { m_slots.at(slotId).normalKey = key; } } + + std::optional getSeedFromDB(u64 titleID); }; -} \ No newline at end of file +} // namespace Crypto diff --git a/include/emulator.hpp b/include/emulator.hpp index de04648e..a668d6c1 100644 --- a/include/emulator.hpp +++ b/include/emulator.hpp @@ -135,4 +135,7 @@ class Emulator { std::filesystem::path getAppDataRoot(); std::span getSMDH(); + + private: + void loadRenderdoc(); }; diff --git a/include/kernel/handles.hpp b/include/kernel/handles.hpp index fe746b65..45400837 100644 --- a/include/kernel/handles.hpp +++ b/include/kernel/handles.hpp @@ -1,7 +1,7 @@ #pragma once #include "helpers.hpp" -using Handle = u32; +using HorizonHandle = u32; namespace KernelHandles { enum : u32 { @@ -61,17 +61,17 @@ namespace KernelHandles { }; // Returns whether "handle" belongs to one of the OS services - static constexpr bool isServiceHandle(Handle handle) { + static constexpr bool isServiceHandle(HorizonHandle handle) { return handle >= MinServiceHandle && handle <= MaxServiceHandle; } // Returns whether "handle" belongs to one of the OS services' shared memory areas - static constexpr bool isSharedMemHandle(Handle handle) { + static constexpr bool isSharedMemHandle(HorizonHandle handle) { return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle; } // Returns the name of a handle as a string based on the given handle - static const char* getServiceName(Handle handle) { + static const char* getServiceName(HorizonHandle handle) { switch (handle) { case AC: return "AC"; case ACT: return "ACT"; diff --git a/include/kernel/kernel.hpp b/include/kernel/kernel.hpp index 6b6d28e1..c7af773d 100644 --- a/include/kernel/kernel.hpp +++ b/include/kernel/kernel.hpp @@ -16,8 +16,11 @@ #include "services/service_manager.hpp" class CPU; +struct Scheduler; class Kernel { + using Handle = HorizonHandle; + std::span regs; CPU& cpu; Memory& mem; @@ -247,6 +250,7 @@ public: ServiceManager& getServiceManager() { return serviceManager; } KFcram& getFcramManager() { return fcramManager; } + Scheduler& getScheduler(); void sendGPUInterrupt(GPUInterrupt type) { serviceManager.sendGPUInterrupt(type); } void clearInstructionCache(); diff --git a/include/kernel/kernel_types.hpp b/include/kernel/kernel_types.hpp index 7c1e5a45..b48a701c 100644 --- a/include/kernel/kernel_types.hpp +++ b/include/kernel/kernel_types.hpp @@ -53,7 +53,7 @@ enum class FcramRegion { struct AddressArbiter {}; struct ResourceLimits { - Handle handle; + HorizonHandle handle; s32 currentCommit = 0; }; @@ -97,6 +97,8 @@ struct Port { }; struct Session { + using Handle = HorizonHandle; + Handle portHandle; // The port this session is subscribed to Session(Handle portHandle) : portHandle(portHandle) {} }; @@ -115,6 +117,8 @@ enum class ThreadStatus { }; struct Thread { + using Handle = HorizonHandle; + u32 initialSP; // Initial r13 value u32 entrypoint; // Initial r15 value u32 priority; @@ -167,6 +171,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) { } struct Mutex { + using Handle = HorizonHandle; + u64 waitlist; // Refer to the getWaitlist function below for documentation Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked Handle handle; // Handle of the mutex itself @@ -209,6 +215,8 @@ struct MemoryBlock { // Generic kernel object class struct KernelObject { + using Handle = HorizonHandle; + Handle handle = 0; // A u32 the OS will use to identify objects void* data = nullptr; KernelObjectType type; diff --git a/include/loader/ncch.hpp b/include/loader/ncch.hpp index 42ce1590..92ad5040 100644 --- a/include/loader/ncch.hpp +++ b/include/loader/ncch.hpp @@ -50,6 +50,7 @@ struct NCCH { static constexpr u64 mediaUnit = 0x200; u64 size = 0; // Size of NCCH converted to bytes + u64 saveDataSize = 0; u32 stackSize = 0; u32 bssSize = 0; u32 exheaderSize = 0; @@ -60,10 +61,10 @@ struct NCCH { CodeSetInfo text, data, rodata; FSInfo partitionInfo; + std::optional primaryKey, secondaryKey; + // Contents of the .code file in the ExeFS std::vector codeFile; - // Contains of the cart's save data - std::vector saveData; // The cart region. Only the CXI's region matters to us. Necessary to get past region locking std::optional region = std::nullopt; std::vector smdh; @@ -76,7 +77,7 @@ struct NCCH { bool hasExeFS() { return exeFS.size != 0; } bool hasRomFS() { return romFS.size != 0; } bool hasCode() { return codeFile.size() != 0; } - bool hasSaveData() { return saveData.size() != 0; } + bool hasSaveData() { return saveDataSize != 0; } // Parse SMDH for region info and such. Returns false on failure, true on success bool parseSMDH(const std::vector &smdh); diff --git a/include/memory.hpp b/include/memory.hpp index a9af0c6b..6220af4d 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -114,6 +114,7 @@ class Memory { bool changeState = false; bool changePerms = false; }; + using Handle = HorizonHandle; u8* fcram; u8* dspRam; // Provided to us by Audio @@ -222,8 +223,14 @@ private: u8* getFCRAM() { return fcram; } enum class BatteryLevel { - Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars + Empty = 0, + AlmostEmpty, + OneBar, + TwoBars, + ThreeBars, + FourBars, }; + u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) { u8 value = static_cast(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5 if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected @@ -287,5 +294,5 @@ private: bool allocateMainThreadStack(u32 size); Regions getConsoleRegion(); - void copySharedFont(u8* ptr); + void copySharedFont(u8* ptr, u32 vaddr); }; diff --git a/include/panda_qt/cheats_window.hpp b/include/panda_qt/cheats_window.hpp index c82b2bd8..93228d5e 100644 --- a/include/panda_qt/cheats_window.hpp +++ b/include/panda_qt/cheats_window.hpp @@ -1,6 +1,13 @@ #pragma once #include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -24,3 +31,60 @@ class CheatsWindow final : public QWidget { std::filesystem::path cheatPath; Emulator* emu; }; + +struct CheatMetadata { + u32 handle = Cheats::badCheatHandle; + std::string name = "New cheat"; + std::string code; + bool enabled = true; +}; + +class CheatEntryWidget : public QWidget { + Q_OBJECT + + public: + CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent); + + void Update() { + name->setText(metadata.name.c_str()); + enabled->setChecked(metadata.enabled); + update(); + } + + void Remove() { + emu->getCheats().removeCheat(metadata.handle); + cheatList->takeItem(cheatList->row(listItem)); + deleteLater(); + } + + const CheatMetadata& getMetadata() { return metadata; } + void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; } + + private: + void checkboxChanged(int state); + void editClicked(); + + Emulator* emu; + CheatMetadata metadata; + u32 handle; + QLabel* name; + QCheckBox* enabled; + QListWidget* cheatList; + QListWidgetItem* listItem; +}; + +class CheatEditDialog : public QDialog { + Q_OBJECT + + public: + CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry); + + void accepted(); + void rejected(); + + private: + Emulator* emu; + CheatEntryWidget& cheatEntry; + QTextEdit* codeEdit; + QLineEdit* nameEdit; +}; \ No newline at end of file diff --git a/include/panda_qt/elided_label.hpp b/include/panda_qt/elided_label.hpp new file mode 100644 index 00000000..9d937f9b --- /dev/null +++ b/include/panda_qt/elided_label.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include +#include +#include + +class ElidedLabel : public QLabel { + Q_OBJECT + public: + explicit ElidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); + explicit ElidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr); + void setText(QString text); + + protected: + void resizeEvent(QResizeEvent* event); + + private: + void updateText(); + QString m_text; + Qt::TextElideMode m_elideMode; +}; \ No newline at end of file diff --git a/include/panda_qt/main_window.hpp b/include/panda_qt/main_window.hpp index 7e93bdf6..eb6b30e0 100644 --- a/include/panda_qt/main_window.hpp +++ b/include/panda_qt/main_window.hpp @@ -17,7 +17,9 @@ #include "panda_qt/about_window.hpp" #include "panda_qt/cheats_window.hpp" #include "panda_qt/config_window.hpp" +#include "panda_qt/patch_window.hpp" #include "panda_qt/screen.hpp" +#include "panda_qt/shader_editor.hpp" #include "panda_qt/text_editor.hpp" #include "services/hid.hpp" @@ -47,6 +49,8 @@ class MainWindow : public QMainWindow { EditCheat, PressTouchscreen, ReleaseTouchscreen, + ReloadUbershader, + SetScreenSize, }; // Tagged union representing our message queue messages @@ -78,6 +82,11 @@ class MainWindow : public QMainWindow { u16 x; u16 y; } touchscreen; + + struct { + u32 width; + u32 height; + } screenSize; }; }; @@ -90,13 +99,15 @@ class MainWindow : public QMainWindow { std::mutex messageQueueMutex; std::vector messageQueue; + QMenuBar* menuBar = nullptr; InputMappings keyboardMappings; - ScreenWidget screen; + ScreenWidget* screen; AboutWindow* aboutWindow; ConfigWindow* configWindow; CheatsWindow* cheatsEditor; TextEditorWindow* luaEditor; - QMenuBar* menuBar = nullptr; + PatchWindow* patchWindow; + ShaderEditorWindow* shaderEditor; // We use SDL's game controller API since it's the sanest API that supports as many controllers as possible SDL_GameController* gameController = nullptr; @@ -108,17 +119,17 @@ class MainWindow : public QMainWindow { void selectROM(); void dumpDspFirmware(); void dumpRomFS(); - void openLuaEditor(); - void openCheatsEditor(); void showAboutMenu(); void initControllers(); void pollControllers(); + void setupControllerSensors(SDL_GameController* controller); void sendMessage(const EmulatorMessage& message); void dispatchMessage(const EmulatorMessage& message); // Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer bool usingGL = false; bool usingVk = false; + bool usingMtl = false; // Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard // This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state @@ -130,11 +141,15 @@ class MainWindow : public QMainWindow { MainWindow(QApplication* app, QWidget* parent = nullptr); ~MainWindow(); + void closeEvent(QCloseEvent* event) override; void keyPressEvent(QKeyEvent* event) override; void keyReleaseEvent(QKeyEvent* event) override; void mousePressEvent(QMouseEvent* event) override; void mouseReleaseEvent(QMouseEvent* event) override; void loadLuaScript(const std::string& code); + void reloadShader(const std::string& shader); void editCheat(u32 handle, const std::vector& cheat, const std::function& callback); + + void handleScreenResize(u32 width, u32 height); }; diff --git a/include/panda_qt/patch_window.hpp b/include/panda_qt/patch_window.hpp new file mode 100644 index 00000000..a6e1a129 --- /dev/null +++ b/include/panda_qt/patch_window.hpp @@ -0,0 +1,31 @@ +#pragma once +#include +#include +#include +#include + +#include "panda_qt/elided_label.hpp" + +class PatchWindow final : public QWidget { + Q_OBJECT + + public: + PatchWindow(QWidget* parent = nullptr); + ~PatchWindow() = default; + + private: + // Show a message box + // Title: Title of the message box to display + // Message: Message to display + // Icon: The type of icon (error, warning, information, etc) to display + // IconPath: If non-null, then a path to an icon in our assets to display on the OK button + void displayMessage( + const QString& title, const QString& message, QMessageBox::Icon icon = QMessageBox::Icon::Warning, const char* iconPath = nullptr + ); + + std::filesystem::path inputPath = ""; + std::filesystem::path patchPath = ""; + + ElidedLabel* inputPathLabel = nullptr; + ElidedLabel* patchPathLabel = nullptr; +}; diff --git a/include/panda_qt/screen.hpp b/include/panda_qt/screen.hpp index dcff3e90..1ed4966b 100644 --- a/include/panda_qt/screen.hpp +++ b/include/panda_qt/screen.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include "gl/context.h" @@ -10,15 +11,28 @@ class ScreenWidget : public QWidget { Q_OBJECT public: - ScreenWidget(QWidget* parent = nullptr); + using ResizeCallback = std::function; + + ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr); + void resizeEvent(QResizeEvent* event) override; + // Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context + void resizeSurface(u32 width, u32 height); + GL::Context* getGLContext() { return glContext.get(); } // Dimensions of our output surface u32 surfaceWidth = 0; u32 surfaceHeight = 0; + WindowInfo windowInfo; + + // Cached "previous" dimensions, used when resizing our window + u32 previousWidth = 0; + u32 previousHeight = 0; private: std::unique_ptr glContext = nullptr; + ResizeCallback resizeCallback; + bool createGLContext(); qreal devicePixelRatioFromScreen() const; diff --git a/include/panda_qt/shader_editor.hpp b/include/panda_qt/shader_editor.hpp new file mode 100644 index 00000000..86bc1149 --- /dev/null +++ b/include/panda_qt/shader_editor.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include + +#include "zep.h" +#include "zep/mode_repl.h" +#include "zep/regress.h" + +class ShaderEditorWindow : public QDialog { + Q_OBJECT + + private: + Zep::ZepWidget_Qt zepWidget; + Zep::IZepReplProvider replProvider; + static constexpr float fontSize = 14.0f; + + public: + // Whether this backend supports shader editor + bool supported = true; + + ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText); + void setText(const std::string& text) { zepWidget.GetEditor().GetMRUBuffer()->SetText(text); } + void setEnable(bool enable); +}; \ No newline at end of file diff --git a/include/panda_sdl/frontend_sdl.hpp b/include/panda_sdl/frontend_sdl.hpp index dd6ab6c0..cbd0b88e 100644 --- a/include/panda_sdl/frontend_sdl.hpp +++ b/include/panda_sdl/frontend_sdl.hpp @@ -23,6 +23,8 @@ class FrontendSDL { SDL_GameController* gameController = nullptr; InputMappings keyboardMappings; + u32 windowWidth = 400; + u32 windowHeight = 480; int gameControllerID; bool programRunning = true; @@ -35,4 +37,6 @@ class FrontendSDL { // And so the user can still use the keyboard to control the analog bool keyboardAnalogX = false; bool keyboardAnalogY = false; + + void setupControllerSensors(SDL_GameController* controller); }; \ No newline at end of file diff --git a/include/renderdoc.hpp b/include/renderdoc.hpp new file mode 100644 index 00000000..ea2c8a3d --- /dev/null +++ b/include/renderdoc.hpp @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once +#include + +#include "helpers.hpp" + +#ifdef PANDA3DS_ENABLE_RENDERDOC +namespace Renderdoc { + // Loads renderdoc dynamic library module. + void loadRenderdoc(); + + // Begins a capture if a renderdoc instance is attached. + void startCapture(); + + // Ends current renderdoc capture. + void endCapture(); + + // Triggers capturing process. + void triggerCapture(); + + // Sets output directory for captures + void setOutputDir(const std::string& path, const std::string& prefix); + + // Returns whether we've compiled with Renderdoc support + static constexpr bool isSupported() { return true; } +} // namespace Renderdoc +#else +namespace Renderdoc { + static void loadRenderdoc() {} + static void startCapture() { Helpers::panic("Tried to start a Renderdoc capture while support for renderdoc is disabled") } + static void endCapture() { Helpers::panic("Tried to end a Renderdoc capture while support for renderdoc is disabled") } + static void triggerCapture() { Helpers::panic("Tried to trigger a Renderdoc capture while support for renderdoc is disabled") } + static void setOutputDir(const std::string& path, const std::string& prefix) {} + static constexpr bool isSupported() { return false; } +} // namespace Renderdoc +#endif + +namespace Renderdoc { + // RAII scope class that encloses a Renderdoc capture, as long as it's triggered by triggerCapture + struct Scope { + Scope() { Renderdoc::startCapture(); } + ~Scope() { Renderdoc::endCapture(); } + + Scope(const Scope&) = delete; + Scope& operator=(const Scope&) = delete; + + Scope(Scope&&) = delete; + Scope& operator=(const Scope&&) = delete; + }; + + // RAII scope class that encloses a Renderdoc capture. Unlike regular Scope it doesn't wait for a trigger, it will always issue the capture + // trigger on its own and take a capture + struct InstantScope { + InstantScope() { + Renderdoc::triggerCapture(); + Renderdoc::startCapture(); + } + + ~InstantScope() { Renderdoc::endCapture(); } + + InstantScope(const InstantScope&) = delete; + InstantScope& operator=(const InstantScope&) = delete; + + InstantScope(InstantScope&&) = delete; + InstantScope& operator=(const InstantScope&&) = delete; + }; +} // namespace Renderdoc \ No newline at end of file diff --git a/include/renderer.hpp b/include/renderer.hpp index 8888b41e..bc5dfac6 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -1,8 +1,10 @@ #pragma once #include -#include #include +#include +#include +#include "PICA/draw_acceleration.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" #include "helpers.hpp" @@ -16,12 +18,16 @@ enum class RendererType : s8 { Null = 0, OpenGL = 1, Vulkan = 2, - Software = 3, + Metal = 3, + Software = 4, }; -class GPU; +struct EmulatorConfig; struct SDL_Window; +class GPU; +class ShaderUnit; + class Renderer { protected: GPU& gpu; @@ -45,6 +51,8 @@ class Renderer { u32 outputWindowWidth = 400; u32 outputWindowHeight = 240 * 2; + EmulatorConfig* emulatorConfig = nullptr; + public: Renderer(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); virtual ~Renderer(); @@ -66,6 +74,19 @@ class Renderer { // This function does things like write back or cache necessary state before we delete our context virtual void deinitGraphicsContext() = 0; + // Functions for hooking up the renderer core to the frontend's shader editor for editing ubershaders in real time + // SupportsShaderReload: Indicates whether the backend offers ubershader reload support or not + // GetUbershader/SetUbershader: Gets or sets the renderer's current ubershader + virtual bool supportsShaderReload() { return false; } + virtual std::string getUbershader() { return ""; } + virtual void setUbershader(const std::string& shader) {} + + // This function is called on every draw call before parsing vertex data. + // It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between + // ubershaders and shadergen, and so on. + // Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; } + // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } @@ -91,4 +112,6 @@ class Renderer { outputWindowWidth = width; outputWindowHeight = height; } + + void setConfig(EmulatorConfig* config) { emulatorConfig = config; } }; diff --git a/include/renderer_gl/gl_driver.hpp b/include/renderer_gl/gl_driver.hpp new file mode 100644 index 00000000..a15c061f --- /dev/null +++ b/include/renderer_gl/gl_driver.hpp @@ -0,0 +1,12 @@ +#pragma once + +// Information about our OpenGL/OpenGL ES driver that we should keep track of +// Stuff like whether specific extensions are supported, and potentially things like OpenGL context information +namespace OpenGL { + struct Driver { + bool supportsExtFbFetch = false; + bool supportsArmFbFetch = false; + + bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; } + }; +} // namespace OpenGL \ No newline at end of file diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 69960f1e..4085cabc 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -38,11 +38,14 @@ struct GLStateManager { GLuint stencilMask; GLuint boundVAO; - GLuint boundVBO; GLuint currentProgram; + GLuint boundUBO; GLenum depthFunc; GLenum logicOp; + GLenum blendEquationRGB, blendEquationAlpha; + GLenum blendFuncSourceRGB, blendFuncSourceAlpha; + GLenum blendFuncDestRGB, blendFuncDestAlpha; void reset(); void resetBlend(); @@ -51,7 +54,7 @@ struct GLStateManager { void resetColourMask(); void resetDepth(); void resetVAO(); - void resetVBO(); + void resetBuffers(); void resetProgram(); void resetScissor(); void resetStencil(); @@ -169,13 +172,6 @@ struct GLStateManager { } } - void bindVBO(GLuint handle) { - if (boundVBO != handle) { - boundVBO = handle; - glBindBuffer(GL_ARRAY_BUFFER, handle); - } - } - void useProgram(GLuint handle) { if (currentProgram != handle) { currentProgram = handle; @@ -183,8 +179,14 @@ struct GLStateManager { } } + void bindUBO(GLuint handle) { + if (boundUBO != handle) { + boundUBO = handle; + glBindBuffer(GL_UNIFORM_BUFFER, boundUBO); + } + } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } - void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } void setColourMask(bool r, bool g, bool b, bool a) { @@ -224,6 +226,41 @@ struct GLStateManager { } void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } + + // Counterpart to glBlendEquationSeparate + void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) { + if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) { + blendEquationRGB = modeRGB; + blendEquationAlpha = modeAlpha; + + glBlendEquationSeparate(modeRGB, modeAlpha); + } + } + + // Counterpart to glBlendFuncSeparate + void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) { + if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha || + blendFuncDestAlpha != destAlpha) { + + blendFuncSourceRGB = sourceRGB; + blendFuncDestRGB = destRGB; + blendFuncSourceAlpha = sourceAlpha; + blendFuncDestAlpha = destAlpha; + + glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha); + } + } + + // Counterpart to regular glBlendEquation + void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); } + + void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) { + setBlendEquation(static_cast(modeRGB), static_cast(modeAlpha)); + } + + void setBlendEquation(OpenGL::BlendEquation mode) { + setBlendEquation(static_cast(mode)); + } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 92f02662..fab239f2 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,11 +1,23 @@ #pragma once #include +#include +#include +#include +#include #include +#include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_config.hpp" +#include "PICA/pica_hash.hpp" +#include "PICA/pica_vert_config.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" +#include "gl/stream_buffer.h" +#include "gl_driver.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" @@ -22,27 +34,48 @@ class RendererGL final : public Renderer { OpenGL::Program triangleProgram; OpenGL::Program displayProgram; - OpenGL::VertexArray vao; + // VAO for when not using accelerated vertex shaders. Contains attribute declarations matching to the PICA fixed function fragment attributes + OpenGL::VertexArray defaultVAO; + // VAO for when using accelerated vertex shaders. The PICA vertex shader inputs are passed as attributes without CPU processing. + OpenGL::VertexArray hwShaderVAO; OpenGL::VertexBuffer vbo; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; bool oldDepthmapEnable = false; + // Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader + bool usingAcceleratedShader = false; + bool performIndexedRender = false; + bool usingShortIndices = false; + + // Set by prepareForDraw, metadata for indexed renders + GLuint minimumIndex = 0; + GLuint maximumIndex = 0; + void* hwIndexBufferOffset = nullptr; + + // When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw + u32 previousAttributeMask = 0; + + // Cached pointer to the current vertex shader when using HW accelerated shaders + OpenGL::Shader* generatedVertexShader = nullptr; SurfaceCache depthBufferCache; SurfaceCache colourBufferCache; @@ -53,25 +86,81 @@ class RendererGL final : public Renderer { OpenGL::VertexBuffer dummyVBO; OpenGL::Texture screenTexture; - GLuint lightLUTTextureArray; + OpenGL::Texture LUTTexture; OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + // The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation + // We can compile this once and then link it with all other generated fragment shaders + OpenGL::Shader defaultShadergenVs; + GLuint shadergenFragmentUBO; + // UBO for uploading the PICA uniforms when using hw shaders + GLuint hwShaderUniformUBO; + + using StreamBuffer = OpenGLStreamBuffer; + std::unique_ptr hwVertexBuffer; + std::unique_ptr hwIndexBuffer; + + // Cache of fixed attribute values so that we don't do any duplicate updates + std::array, 16> fixedAttrValues; + + // Cached recompiled fragment shader + struct CachedProgram { + OpenGL::Program program; + }; + + struct ShaderCache { + std::unordered_map> vertexShaderCache; + std::unordered_map fragmentShaderCache; + + // Program cache indexed by GLuints for the vertex and fragment shader to use + // Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint + std::unordered_map programCache; + + void clear() { + for (auto& it : programCache) { + CachedProgram& cachedProgram = it.second; + cachedProgram.program.free(); + } + + for (auto& it : vertexShaderCache) { + if (it.second.has_value()) { + it.second->free(); + } + } + + for (auto& it : fragmentShaderCache) { + it.second.free(); + } + + programCache.clear(); + vertexShaderCache.clear(); + fragmentShaderCache.clear(); + } + }; + ShaderCache shaderCache; OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + OpenGL::Program& getSpecializedShader(); + + PICA::ShaderGen::FragmentGenerator fragShaderGen; + OpenGL::Driver driverInfo; MAKE_LOG_FUNCTION(log, rendererLogger) void setupBlending(); void setupStencilTest(bool stencilEnable); void bindDepthBuffer(); - void setupTextureEnvState(); + void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); + void updateFogLUT(); void initGraphicsContextInternal(); + void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel); + public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) - : Renderer(gpu, internalRegs, externalRegs) {} + : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} ~RendererGL() override; void reset() override; @@ -82,12 +171,18 @@ class RendererGL final : public Renderer { void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; void drawVertices(PICA::PrimType primType, std::span vertices) override; // Draw the given vertices void deinitGraphicsContext() override; + + virtual bool supportsShaderReload() override { return true; } + virtual std::string getUbershader() override; + virtual void setUbershader(const std::string& shader) override; + virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override; std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Note: The caller is responsible for deleting the currently bound FBO before calling this void setFBO(uint handle) { screenFramebuffer.m_handle = handle; } void resetStateManager() { gl.reset(); } + void initUbershader(OpenGL::Program& program); #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); } @@ -95,4 +190,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/include/renderer_gl/surface_cache.hpp b/include/renderer_gl/surface_cache.hpp index 5323741f..fb7c71a5 100644 --- a/include/renderer_gl/surface_cache.hpp +++ b/include/renderer_gl/surface_cache.hpp @@ -19,8 +19,6 @@ template class SurfaceCache { // Vanilla std::optional can't hold actual references using OptionalRef = std::optional>; - static_assert(std::is_same() || std::is_same() || - std::is_same(), "Invalid surface type"); size_t size; size_t evictionIndex; diff --git a/include/renderer_mtl/mtl_blit_pipeline_cache.hpp b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp new file mode 100644 index 00000000..1fa47f42 --- /dev/null +++ b/include/renderer_mtl/mtl_blit_pipeline_cache.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include + +#include "objc_helper.hpp" +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + struct BlitPipelineHash { + // Formats + ColorFmt colorFmt; + DepthFmt depthFmt; + }; + + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class BlitPipelineCache { + public: + BlitPipelineCache() = default; + + ~BlitPipelineCache() { + reset(); + vertexFunction->release(); + fragmentFunction->release(); + } + + void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) { + device = dev; + vertexFunction = vert; + fragmentFunction = frag; + } + + MTL::RenderPipelineState* get(BlitPipelineHash hash) { + u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt; + auto& pipeline = pipelineCache[intHash]; + if (!pipeline) { + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + + desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt)); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Blit pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + } + + private: + std::map pipelineCache; + + MTL::Device* device; + MTL::Function* vertexFunction; + MTL::Function* fragmentFunction; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_command_encoder.hpp b/include/renderer_mtl/mtl_command_encoder.hpp new file mode 100644 index 00000000..562e6b79 --- /dev/null +++ b/include/renderer_mtl/mtl_command_encoder.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include + +namespace Metal { + struct RenderState { + MTL::RenderPipelineState* renderPipelineState = nullptr; + MTL::DepthStencilState* depthStencilState = nullptr; + MTL::Texture* textures[3] = {nullptr}; + MTL::SamplerState* samplerStates[3] = {nullptr}; + }; + + class CommandEncoder { + public: + void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) { + renderCommandEncoder = rce; + + // Reset the render state + renderState = RenderState{}; + } + + // Resource binding + void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) { + if (renderPipelineState != renderState.renderPipelineState) { + renderCommandEncoder->setRenderPipelineState(renderPipelineState); + renderState.renderPipelineState = renderPipelineState; + } + } + + void setDepthStencilState(MTL::DepthStencilState* depthStencilState) { + if (depthStencilState != renderState.depthStencilState) { + renderCommandEncoder->setDepthStencilState(depthStencilState); + renderState.depthStencilState = depthStencilState; + } + } + + void setFragmentTexture(MTL::Texture* texture, u32 index) { + if (texture != renderState.textures[index]) { + renderCommandEncoder->setFragmentTexture(texture, index); + renderState.textures[index] = texture; + } + } + + void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) { + if (samplerState != renderState.samplerStates[index]) { + renderCommandEncoder->setFragmentSamplerState(samplerState, index); + renderState.samplerStates[index] = samplerState; + } + } + + private: + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + + RenderState renderState; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_common.hpp b/include/renderer_mtl/mtl_common.hpp new file mode 100644 index 00000000..a148520f --- /dev/null +++ b/include/renderer_mtl/mtl_common.hpp @@ -0,0 +1,6 @@ +#pragma once + +#include + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) diff --git a/include/renderer_mtl/mtl_depth_stencil_cache.hpp b/include/renderer_mtl/mtl_depth_stencil_cache.hpp new file mode 100644 index 00000000..8f7256a9 --- /dev/null +++ b/include/renderer_mtl/mtl_depth_stencil_cache.hpp @@ -0,0 +1,80 @@ +#pragma once + +#include + +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + struct DepthStencilHash { + u32 stencilConfig; + u16 stencilOpConfig; + bool depthStencilWrite; + u8 depthFunc; + }; + + class DepthStencilCache { + public: + DepthStencilCache() = default; + + ~DepthStencilCache() { reset(); } + + void set(MTL::Device* dev) { device = dev; } + + MTL::DepthStencilState* get(DepthStencilHash hash) { + u64 intHash = + ((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig; + auto& depthStencilState = depthStencilCache[intHash]; + if (!depthStencilState) { + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(hash.depthStencilWrite); + desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc)); + + const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig); + MTL::StencilDescriptor* stencilDesc = nullptr; + if (stencilEnable) { + const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig); + const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig); + + const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0; + + const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig); + const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig); + const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig); + + stencilDesc = MTL::StencilDescriptor::alloc()->init(); + stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp)); + stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp)); + stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp)); + stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc)); + stencilDesc->setReadMask(stencilRefMask); + stencilDesc->setWriteMask(stencilBufferMask); + + desc->setFrontFaceStencil(stencilDesc); + desc->setBackFaceStencil(stencilDesc); + } + + depthStencilState = device->newDepthStencilState(desc); + + desc->release(); + if (stencilDesc) { + stencilDesc->release(); + } + } + + return depthStencilState; + } + + void reset() { + for (auto& pair : depthStencilCache) { + pair.second->release(); + } + depthStencilCache.clear(); + } + + private: + std::map depthStencilCache; + MTL::Device* device; + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_draw_pipeline_cache.hpp b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp new file mode 100644 index 00000000..7178785e --- /dev/null +++ b/include/renderer_mtl/mtl_draw_pipeline_cache.hpp @@ -0,0 +1,162 @@ +#pragma once + +#include + +#include "objc_helper.hpp" +#include "pica_to_mtl.hpp" + +using namespace PICA; + +namespace Metal { + struct DrawFragmentFunctionHash { + u32 lightingConfig1; // 32 bits (TODO: check this) + bool lightingEnabled; // 1 bit + u8 lightingNumLights; // 3 bits + // | ref | func | on | + u16 alphaControl; // 12 bits (mask: 11111111 0111 0001) + }; + + inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) { + if (!l.lightingEnabled && r.lightingEnabled) return true; + if (l.lightingNumLights < r.lightingNumLights) return true; + if (l.lightingConfig1 < r.lightingConfig1) return true; + if (l.alphaControl < r.alphaControl) return true; + + return false; + } + + struct DrawPipelineHash { // 56 bits + // Formats + ColorFmt colorFmt; // 3 bits + DepthFmt depthFmt; // 3 bits + + // Blending + bool blendEnabled; // 1 bit + // | functions | aeq | ceq | + u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111) + u8 colorWriteMask; // 4 bits + + DrawFragmentFunctionHash fragHash; + }; + + inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) { + if ((u32)l.colorFmt < (u32)r.colorFmt) return true; + if ((u32)l.depthFmt < (u32)r.depthFmt) return true; + if (!l.blendEnabled && r.blendEnabled) return true; + if (l.blendControl < r.blendControl) return true; + if (l.colorWriteMask < r.colorWriteMask) return true; + if (l.fragHash < r.fragHash) return true; + + return false; + } + + // This pipeline only caches the pipeline with all of its color and depth attachment variations + class DrawPipelineCache { + public: + DrawPipelineCache() = default; + + ~DrawPipelineCache() { + reset(); + vertexDescriptor->release(); + vertexFunction->release(); + } + + void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) { + device = dev; + library = lib; + vertexFunction = vert; + vertexDescriptor = vertDesc; + } + + MTL::RenderPipelineState* get(DrawPipelineHash hash) { + auto& pipeline = pipelineCache[hash]; + + if (!pipeline) { + auto& fragmentFunction = fragmentFunctionCache[hash.fragHash]; + if (!fragmentFunction) { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0)); + constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1)); + constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2)); + constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3)); + + NS::Error* error = nullptr; + fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + } + + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(vertexFunction); + desc->setFragmentFunction(fragmentFunction); + desc->setVertexDescriptor(vertexDescriptor); + + auto colorAttachment = desc->colorAttachments()->object(0); + colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt)); + MTL::ColorWriteMask writeMask = 0; + if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed; + if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen; + if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue; + if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha; + colorAttachment->setWriteMask(writeMask); + if (hash.blendEnabled) { + const u8 rgbEquation = hash.blendControl & 0x7; + const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl); + + // Get blending functions + const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl); + const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl); + const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl); + const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl); + + colorAttachment->setBlendingEnabled(true); + colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation)); + colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation)); + colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc)); + colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc)); + colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc)); + colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc)); + } + + MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt); + desc->setDepthAttachmentPixelFormat(depthFormat); + if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat); + + NS::Error* error = nullptr; + desc->setLabel(toNSString("Draw pipeline")); + pipeline = device->newRenderPipelineState(desc, &error); + if (error) { + Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + desc->release(); + } + + return pipeline; + } + + void reset() { + for (auto& pair : pipelineCache) { + pair.second->release(); + } + pipelineCache.clear(); + + for (auto& pair : fragmentFunctionCache) { + pair.second->release(); + } + fragmentFunctionCache.clear(); + } + + private: + std::map pipelineCache; + std::map fragmentFunctionCache; + + MTL::Device* device; + MTL::Library* library; + MTL::Function* vertexFunction; + MTL::VertexDescriptor* vertexDescriptor; + }; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_lut_texture.hpp b/include/renderer_mtl/mtl_lut_texture.hpp new file mode 100644 index 00000000..531dc73c --- /dev/null +++ b/include/renderer_mtl/mtl_lut_texture.hpp @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace Metal { + +class LutTexture { +public: + LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name); + ~LutTexture(); + u32 getNextIndex(); + + MTL::Texture* getTexture() { return texture; } + u32 getCurrentIndex() { return currentIndex; } +private: + MTL::Texture* texture; + u32 currentIndex = 0; +}; + +} // namespace Metal diff --git a/include/renderer_mtl/mtl_render_target.hpp b/include/renderer_mtl/mtl_render_target.hpp new file mode 100644 index 00000000..8f80ea64 --- /dev/null +++ b/include/renderer_mtl/mtl_render_target.hpp @@ -0,0 +1,91 @@ +#pragma once +#include +#include +#include + +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "objc_helper.hpp" +#include "opengl.hpp" +#include "pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + template + struct RenderTarget { + MTL::Device* device; + + u32 location; + Format_t format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + MTL::Texture* texture = nullptr; + + RenderTarget() : valid(false) {} + + RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + Math::Rect getSubRect(u32 inputAddress, u32 width, u32 height) { + const u32 startOffset = (inputAddress - location) / sizePerPixel(format); + const u32 x0 = (startOffset % (size.x() * 8)) / 8; + const u32 y0 = (startOffset / (size.x() * 8)) * 8; + return Math::Rect{x0, size.y() - y0, x0 + width, size.y() - height - y0}; + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(RenderTarget& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate() { + MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid; + if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format); + } else if (std::is_same::value) { + pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format); + } else { + panic("Invalid format type"); + } + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModePrivate); + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + std::string(std::is_same::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" + + std::to_string(size.v()) + )); + descriptor->release(); + } + + void free() { + valid = false; + + if (texture) { + texture->release(); + } + } + + u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); } + }; + + using ColorRenderTarget = RenderTarget; + using DepthStencilRenderTarget = RenderTarget; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp new file mode 100644 index 00000000..93103091 --- /dev/null +++ b/include/renderer_mtl/mtl_texture.hpp @@ -0,0 +1,73 @@ +#pragma once + +#include +#include +#include + +#include "PICA/regs.hpp" +#include "boost/icl/interval.hpp" +#include "helpers.hpp" +#include "math_util.hpp" +#include "opengl.hpp" +#include "renderer_mtl/pica_to_mtl.hpp" + +template +using Interval = boost::icl::right_open_interval; + +namespace Metal { + struct Texture { + MTL::Device* device; + + u32 location; + u32 config; // Magnification/minification filter, wrapping configs, etc + PICA::TextureFmt format; + OpenGL::uvec2 size; + bool valid; + + // Range of VRAM taken up by buffer + Interval range; + + PICA::PixelFormatInfo formatInfo; + MTL::Texture* texture = nullptr; + MTL::SamplerState* sampler = nullptr; + + Texture() : valid(false) {} + + Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true) + : device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) { + u64 endLoc = (u64)loc + sizeInBytes(); + // Check if start and end are valid here + range = Interval(loc, (u32)endLoc); + } + + // For 2 textures to "match" we only care about their locations, formats, and dimensions to match + // For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture + bool matches(Texture& other) { + return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y(); + } + + void allocate(); + void setNewConfig(u32 newConfig); + void decodeTexture(std::span data); + void free(); + u64 sizeInBytes(); + + u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); + + // Get the morton interleave offset of a texel based on its U and V values + static u32 mortonInterleave(u32 u, u32 v); + // Get the byte offset of texel (u, v) in the texture + static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); + static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + + // Returns the format of this texture as a string + std::string_view formatToString() { return PICA::textureFormatToString(format); } + + // Returns the texel at coordinates (u, v) of an ETC1(A4) texture + // TODO: Make hasAlpha a template parameter + u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); + u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); + }; +} // namespace Metal diff --git a/include/renderer_mtl/mtl_vertex_buffer_cache.hpp b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp new file mode 100644 index 00000000..b392389c --- /dev/null +++ b/include/renderer_mtl/mtl_vertex_buffer_cache.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include + +#include "helpers.hpp" +#include "pica_to_mtl.hpp" + + +using namespace PICA; + +namespace Metal { + struct BufferHandle { + MTL::Buffer* buffer; + usize offset; + }; + + class VertexBufferCache { + // 128MB buffer for caching vertex data + static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024; + + public: + VertexBufferCache() = default; + + ~VertexBufferCache() { + endFrame(); + buffer->release(); + } + + void set(MTL::Device* dev) { + device = dev; + create(); + } + + void endFrame() { + ptr = 0; + for (auto buffer : additionalAllocations) { + buffer->release(); + } + additionalAllocations.clear(); + } + + BufferHandle get(const void* data, usize size) { + // If the vertex buffer is too large, just create a new one + if (ptr + size > CACHE_BUFFER_SIZE) { + MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared); + newBuffer->setLabel(toNSString("Additional vertex buffer")); + additionalAllocations.push_back(newBuffer); + Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer"); + + return BufferHandle{newBuffer, 0}; + } + + // Copy the data into the buffer + std::memcpy((char*)buffer->contents() + ptr, data, size); + + auto oldPtr = ptr; + ptr += size; + + return BufferHandle{buffer, oldPtr}; + } + + void reset() { + endFrame(); + + if (buffer) { + buffer->release(); + create(); + } + } + + private: + MTL::Buffer* buffer = nullptr; + usize ptr = 0; + std::vector additionalAllocations; + + MTL::Device* device; + + void create() { + buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared); + buffer->setLabel(toNSString("Shared vertex buffer")); + } + }; +} // namespace Metal diff --git a/include/renderer_mtl/objc_helper.hpp b/include/renderer_mtl/objc_helper.hpp new file mode 100644 index 00000000..86992f1d --- /dev/null +++ b/include/renderer_mtl/objc_helper.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include + +#include "mtl_common.hpp" + +namespace Metal { + dispatch_data_t createDispatchData(const void* data, size_t size); +} // namespace Metal + +// Cast from std::string to NS::String* +inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); } \ No newline at end of file diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp new file mode 100644 index 00000000..715088b4 --- /dev/null +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -0,0 +1,152 @@ +#pragma once + +#include + +#include "PICA/regs.hpp" + +namespace PICA { + struct PixelFormatInfo { + MTL::PixelFormat pixelFormat; + size_t bytesPerTexel; + }; + + constexpr PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 + {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 + {MTL::PixelFormatRG8Unorm, 2}, // RG8 + {MTL::PixelFormatRGBA8Unorm, 4}, // I8 + {MTL::PixelFormatA8Unorm, 1}, // A8 + {MTL::PixelFormatABGR4Unorm, 2}, // IA4 + {MTL::PixelFormatABGR4Unorm, 2}, // I4 + {MTL::PixelFormatA8Unorm, 1}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 + }; + + inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast(format)]; } + + inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) { + switch (format) { + case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm; + case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm? + case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm? + case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm; + } + } + + inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) { + switch (format) { + case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm; + case DepthFmt::Unknown1: return MTL::PixelFormatInvalid; + case DepthFmt::Depth24: + return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats + // Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead + case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8; + } + } + + inline MTL::CompareFunction toMTLCompareFunc(u8 func) { + switch (func) { + case 0: return MTL::CompareFunctionNever; + case 1: return MTL::CompareFunctionAlways; + case 2: return MTL::CompareFunctionEqual; + case 3: return MTL::CompareFunctionNotEqual; + case 4: return MTL::CompareFunctionLess; + case 5: return MTL::CompareFunctionLessEqual; + case 6: return MTL::CompareFunctionGreater; + case 7: return MTL::CompareFunctionGreaterEqual; + default: Helpers::panic("Unknown compare function %u", func); + } + + return MTL::CompareFunctionAlways; + } + + inline MTL::BlendOperation toMTLBlendOperation(u8 op) { + switch (op) { + case 0: return MTL::BlendOperationAdd; + case 1: return MTL::BlendOperationSubtract; + case 2: return MTL::BlendOperationReverseSubtract; + case 3: return MTL::BlendOperationMin; + case 4: return MTL::BlendOperationMax; + case 5: return MTL::BlendOperationAdd; // Unused (same as 0) + case 6: return MTL::BlendOperationAdd; // Unused (same as 0) + case 7: return MTL::BlendOperationAdd; // Unused (same as 0) + default: Helpers::panic("Unknown blend operation %u", op); + } + + return MTL::BlendOperationAdd; + } + + inline MTL::BlendFactor toMTLBlendFactor(u8 factor) { + switch (factor) { + case 0: return MTL::BlendFactorZero; + case 1: return MTL::BlendFactorOne; + case 2: return MTL::BlendFactorSourceColor; + case 3: return MTL::BlendFactorOneMinusSourceColor; + case 4: return MTL::BlendFactorDestinationColor; + case 5: return MTL::BlendFactorOneMinusDestinationColor; + case 6: return MTL::BlendFactorSourceAlpha; + case 7: return MTL::BlendFactorOneMinusSourceAlpha; + case 8: return MTL::BlendFactorDestinationAlpha; + case 9: return MTL::BlendFactorOneMinusDestinationAlpha; + case 10: return MTL::BlendFactorBlendColor; + case 11: return MTL::BlendFactorOneMinusBlendColor; + case 12: return MTL::BlendFactorBlendAlpha; + case 13: return MTL::BlendFactorOneMinusBlendAlpha; + case 14: return MTL::BlendFactorSourceAlphaSaturated; + case 15: return MTL::BlendFactorOne; // Undocumented + default: Helpers::panic("Unknown blend factor %u", factor); + } + + return MTL::BlendFactorOne; + } + + inline MTL::StencilOperation toMTLStencilOperation(u8 op) { + switch (op) { + case 0: return MTL::StencilOperationKeep; + case 1: return MTL::StencilOperationZero; + case 2: return MTL::StencilOperationReplace; + case 3: return MTL::StencilOperationIncrementClamp; + case 4: return MTL::StencilOperationDecrementClamp; + case 5: return MTL::StencilOperationInvert; + case 6: return MTL::StencilOperationIncrementWrap; + case 7: return MTL::StencilOperationDecrementWrap; + default: Helpers::panic("Unknown stencil operation %u", op); + } + + return MTL::StencilOperationKeep; + } + + inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) { + switch (primType) { + case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle; + case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip; + case PrimType::TriangleFan: + Helpers::warn("Triangle fans are not supported on Metal, using triangles instead"); + return MTL::PrimitiveTypeTriangle; + case PrimType::GeometryPrimitive: + return MTL::PrimitiveTypeTriangle; + } + } + + inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) { + switch (addrMode) { + case 0: return MTL::SamplerAddressModeClampToEdge; + case 1: return MTL::SamplerAddressModeClampToBorderColor; + case 2: return MTL::SamplerAddressModeRepeat; + case 3: return MTL::SamplerAddressModeMirrorRepeat; + case 4: return MTL::SamplerAddressModeClampToEdge; + case 5: return MTL::SamplerAddressModeClampToBorderColor; + case 6: return MTL::SamplerAddressModeRepeat; + case 7: return MTL::SamplerAddressModeRepeat; + default: Helpers::panic("Unknown sampler address mode %u", addrMode); + } + + return MTL::SamplerAddressModeClampToEdge; + } +} // namespace PICA diff --git a/include/renderer_mtl/renderer_mtl.hpp b/include/renderer_mtl/renderer_mtl.hpp new file mode 100644 index 00000000..bd5c3bf1 --- /dev/null +++ b/include/renderer_mtl/renderer_mtl.hpp @@ -0,0 +1,207 @@ +#pragma once + +#include +#include + +#include "mtl_blit_pipeline_cache.hpp" +#include "mtl_command_encoder.hpp" +#include "mtl_depth_stencil_cache.hpp" +#include "mtl_draw_pipeline_cache.hpp" +#include "mtl_lut_texture.hpp" +#include "mtl_render_target.hpp" +#include "mtl_texture.hpp" +#include "mtl_vertex_buffer_cache.hpp" +#include "renderer.hpp" + + +// HACK: use the OpenGL cache +#include "../renderer_gl/surface_cache.hpp" + +class GPU; + +struct Color4 { + float r, g, b, a; +}; + +class RendererMTL final : public Renderer { + public: + RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs); + ~RendererMTL() override; + + void reset() override; + void display() override; + void initGraphicsContext(SDL_Window* window) override; + void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; + void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; + void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override; + void drawVertices(PICA::PrimType primType, std::span vertices) override; + void screenshot(const std::string& name) override; + void deinitGraphicsContext() override; + +#ifdef PANDA3DS_FRONTEND_QT + virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {} +#endif + + private: + CA::MetalLayer* metalLayer; + + MTL::Device* device; + MTL::CommandQueue* commandQueue; + + Metal::CommandEncoder commandEncoder; + + // Libraries + MTL::Library* library; + + // Caches + SurfaceCache colorRenderTargetCache; + SurfaceCache depthStencilRenderTargetCache; + SurfaceCache textureCache; + Metal::BlitPipelineCache blitPipelineCache; + Metal::DrawPipelineCache drawPipelineCache; + Metal::DepthStencilCache depthStencilCache; + Metal::VertexBufferCache vertexBufferCache; + + // Resources + MTL::SamplerState* nearestSampler; + MTL::SamplerState* linearSampler; + MTL::Texture* nullTexture; + MTL::DepthStencilState* defaultDepthStencilState; + + Metal::LutTexture* lutLightingTexture; + Metal::LutTexture* lutFogTexture; + + // Pipelines + MTL::RenderPipelineState* displayPipeline; + // MTL::RenderPipelineState* copyToLutTexturePipeline; + + // Clears + std::map colorClearOps; + std::map depthClearOps; + std::map stencilClearOps; + + // Active state + MTL::CommandBuffer* commandBuffer = nullptr; + MTL::RenderCommandEncoder* renderCommandEncoder = nullptr; + MTL::Texture* lastColorTexture = nullptr; + MTL::Texture* lastDepthTexture = nullptr; + + // Debug + std::string nextRenderPassName; + + void createCommandBufferIfNeeded() { + if (!commandBuffer) { + commandBuffer = commandQueue->commandBuffer(); + } + } + + void endRenderPass() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder = nullptr; + } + } + + void beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr + ); + + void commitCommandBuffer() { + if (renderCommandEncoder) { + renderCommandEncoder->endEncoding(); + renderCommandEncoder->release(); + renderCommandEncoder = nullptr; + } + if (commandBuffer) { + commandBuffer->commit(); + // HACK + commandBuffer->waitUntilCompleted(); + commandBuffer->release(); + commandBuffer = nullptr; + } + } + + template + inline void clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment, + SetClearDataT setClearData + ) { + bool beginRenderPass = (renderPassDescriptor == nullptr); + if (!renderPassDescriptor) { + renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + } + + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + setClearData(attachment, clearData); + attachment->setLoadAction(MTL::LoadActionClear); + attachment->setStoreAction(MTL::StoreActionStore); + + if (beginRenderPass) { + if (std::is_same::value) + beginRenderPassIfNeeded(renderPassDescriptor, true, texture); + else + beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture); + } + } + + template + inline bool clearAttachment( + MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map& clearOps, + GetAttachmentT getAttachment, SetClearDataT setClearData + ) { + auto it = clearOps.find(texture); + if (it != clearOps.end()) { + clearAttachment(renderPassDescriptor, texture, it->second, getAttachment, setClearData); + clearOps.erase(it); + return true; + } + + if (renderPassDescriptor) { + AttachmentT* attachment = getAttachment(renderPassDescriptor); + attachment->setTexture(texture); + attachment->setLoadAction(MTL::LoadActionLoad); + attachment->setStoreAction(MTL::StoreActionStore); + } + + return false; + } + + bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, colorClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); }, + [](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); } + ); + } + + bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, depthClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); }, + [](auto attachment, auto& depth) { attachment->setClearDepth(depth); } + ); + } + + bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) { + return clearAttachment( + renderPassDescriptor, texture, stencilClearOps, + [](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); }, + [](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); } + ); + } + + std::optional getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true + ); + Metal::DepthStencilRenderTarget& getDepthRenderTarget(); + Metal::Texture& getTexture(Metal::Texture& tex); + void setupTextureEnvState(MTL::RenderCommandEncoder* encoder); + void bindTexturesToSlots(); + void updateLightingLUT(MTL::RenderCommandEncoder* encoder); + void updateFogLUT(MTL::RenderCommandEncoder* encoder); + void textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect + ); +}; diff --git a/include/scheduler.hpp b/include/scheduler.hpp index 97c50afc..cfc4d5e8 100644 --- a/include/scheduler.hpp +++ b/include/scheduler.hpp @@ -11,7 +11,8 @@ struct Scheduler { VBlank = 0, // End of frame event UpdateTimers = 1, // Update kernel timer objects RunDSP = 2, // Make the emulated DSP run for one audio frame - Panic = 3, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX) + SignalY2R = 3, // Signal that a Y2R conversion has finished + Panic = 4, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX) TotalNumberOfEvents // How many event types do we have in total? }; static constexpr usize totalNumberOfEvents = static_cast(EventType::TotalNumberOfEvents); diff --git a/include/sdl_sensors.hpp b/include/sdl_sensors.hpp new file mode 100644 index 00000000..e34721af --- /dev/null +++ b/include/sdl_sensors.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +#include "helpers.hpp" +#include "services/hid.hpp" + +// Convert SDL sensor readings to 3DS format +// We use the same code for Android as well, since the values we get from Android are in the same format as SDL (m/s^2 for acceleration, rad/s for +// rotation) +namespace Sensors::SDL { + // Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID + // Returns [pitch, roll, yaw] + static glm::vec3 convertRotation(glm::vec3 rotation) { + // Annoyingly, Android doesn't support the header yet so we define pi ourselves + static constexpr double pi = 3.141592653589793; + // Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID + constexpr float scale = 180.f / pi * HIDService::gyroscopeCoeff; + // The axes are also inverted, so invert scale before the multiplication. + return rotation * -scale; + } + + static glm::vec3 convertAcceleration(float* data) { + // Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930 + // At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity. + // This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4. + static constexpr float accelMax = 9.f; + // We define standard gravity(g) ourself instead of using the SDL one in order for the code to work on Android too. + static constexpr float standardGravity = 9.80665f; + + s16 x = std::clamp(s16(data[0] / accelMax * 930.f), -930, +930); + s16 y = std::clamp(s16(data[1] / (standardGravity * accelMax) * 930.f - 350.f), -930, +930); + s16 z = std::clamp(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930); + + return glm::vec3(x, y, z); + } +} // namespace Sensors::SDL diff --git a/include/services/ac.hpp b/include/services/ac.hpp index 4ba53033..56acd436 100644 --- a/include/services/ac.hpp +++ b/include/services/ac.hpp @@ -8,6 +8,8 @@ #include "result/result.hpp" class ACService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::AC; Memory& mem; MAKE_LOG_FUNCTION(log, acLogger) diff --git a/include/services/act.hpp b/include/services/act.hpp index 92c69c60..3fe68993 100644 --- a/include/services/act.hpp +++ b/include/services/act.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class ACTService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::ACT; Memory& mem; MAKE_LOG_FUNCTION(log, actLogger) @@ -15,7 +17,7 @@ class ACTService { void generateUUID(u32 messagePointer); void getAccountDataBlock(u32 messagePointer); -public: + public: ACTService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/am.hpp b/include/services/am.hpp index 672909ff..f72a5efc 100644 --- a/include/services/am.hpp +++ b/include/services/am.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class AMService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::AM; Memory& mem; MAKE_LOG_FUNCTION(log, amLogger) @@ -15,7 +17,7 @@ class AMService { void getPatchTitleInfo(u32 messagePointer); void listTitleInfo(u32 messagePointer); -public: + public: AMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/apt.hpp b/include/services/apt.hpp index 48a59c2d..624151c1 100644 --- a/include/services/apt.hpp +++ b/include/services/apt.hpp @@ -12,7 +12,8 @@ class Kernel; enum class ConsoleModel : u32 { - Old3DS, New3DS + Old3DS, + New3DS, }; // https://www.3dbrew.org/wiki/NS_and_APT_Services#Command @@ -41,6 +42,8 @@ namespace APT::Transitions { } class APTService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::APT; Memory& mem; Kernel& kernel; @@ -99,7 +102,7 @@ class APTService { u32 screencapPostPermission; -public: + public: APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/boss.hpp b/include/services/boss.hpp index 769184e5..edc50dee 100644 --- a/include/services/boss.hpp +++ b/include/services/boss.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class BOSSService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::BOSS; Memory& mem; MAKE_LOG_FUNCTION(log, bossLogger) @@ -17,7 +19,7 @@ class BOSSService { void getNewArrivalFlag(u32 messagePointer); void getNsDataIdList(u32 messagePointer, u32 commandWord); void getOptoutFlag(u32 messagePointer); - void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra + void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra void getTaskIdList(u32 messagePointer); void getTaskInfo(u32 messagePointer); void getTaskServiceStatus(u32 messagePointer); @@ -35,7 +37,8 @@ class BOSSService { void unregisterTask(u32 messagePointer); s8 optoutFlag; -public: + + public: BOSSService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/cam.hpp b/include/services/cam.hpp index 60ede3b9..e5254997 100644 --- a/include/services/cam.hpp +++ b/include/services/cam.hpp @@ -12,6 +12,7 @@ class Kernel; class CAMService { + using Handle = HorizonHandle; using Event = std::optional; struct Port { diff --git a/include/services/cecd.hpp b/include/services/cecd.hpp index 656e38ad..4612c17b 100644 --- a/include/services/cecd.hpp +++ b/include/services/cecd.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -9,6 +10,8 @@ class Kernel; class CECDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::CECD; Memory& mem; Kernel& kernel; @@ -20,7 +23,7 @@ class CECDService { void getInfoEventHandle(u32 messagePointer); void openAndRead(u32 messagePointer); -public: + public: CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/cfg.hpp b/include/services/cfg.hpp index 7241a409..e2ddffa8 100644 --- a/include/services/cfg.hpp +++ b/include/services/cfg.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "logger.hpp" #include "memory.hpp" @@ -7,8 +8,10 @@ #include "result/result.hpp" class CFGService { + using Handle = HorizonHandle; + Memory& mem; - CountryCodes country = CountryCodes::US; // Default to USA + CountryCodes country = CountryCodes::US; // Default to USA MAKE_LOG_FUNCTION(log, cfgLogger) void writeStringU16(u32 pointer, const std::u16string& string); @@ -27,12 +30,12 @@ class CFGService { void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask); -public: + public: enum class Type { - U, // cfg:u - I, // cfg:i - S, // cfg:s - NOR, // cfg:nor + U, // cfg:u + I, // cfg:i + S, // cfg:s + NOR, // cfg:nor }; CFGService(Memory& mem) : mem(mem) {} diff --git a/include/services/csnd.hpp b/include/services/csnd.hpp index 8f6d60f8..93fa941d 100644 --- a/include/services/csnd.hpp +++ b/include/services/csnd.hpp @@ -10,6 +10,8 @@ class Kernel; class CSNDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::CSND; Memory& mem; Kernel& kernel; @@ -30,7 +32,5 @@ class CSNDService { void reset(); void handleSyncRequest(u32 messagePointer); - void setSharedMemory(u8* ptr) { - sharedMemory = ptr; - } + void setSharedMemory(u8* ptr) { sharedMemory = ptr; } }; \ No newline at end of file diff --git a/include/services/dlp_srvr.hpp b/include/services/dlp_srvr.hpp index 1e714283..ae9cc96f 100644 --- a/include/services/dlp_srvr.hpp +++ b/include/services/dlp_srvr.hpp @@ -8,6 +8,8 @@ // Please forgive me for how everything in this file is named // "dlp:SRVR" is not a nice name to work with class DlpSrvrService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::DLP_SRVR; Memory& mem; MAKE_LOG_FUNCTION(log, dlpSrvrLogger) @@ -15,7 +17,7 @@ class DlpSrvrService { // Service commands void isChild(u32 messagePointer); -public: + public: DlpSrvrService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/dsp.hpp b/include/services/dsp.hpp index 5cbd4fd5..bc1adbca 100644 --- a/include/services/dsp.hpp +++ b/include/services/dsp.hpp @@ -14,6 +14,8 @@ class Kernel; class DSPService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::DSP; Memory& mem; Kernel& kernel; diff --git a/include/services/fonts.hpp b/include/services/fonts.hpp new file mode 100644 index 00000000..9fa84be1 --- /dev/null +++ b/include/services/fonts.hpp @@ -0,0 +1,84 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.h + +#pragma once + +#include + +#include "helpers.hpp" +#include "swap.hpp" + +namespace HLE::Fonts { + struct CFNT { + u8 magic[4]; + u16_le endianness; + u16_le headerSize; + u32_le version; + u32_le fileSize; + u32_le numBlocks; + }; + + struct SectionHeader { + u8 magic[4]; + u32_le sectionSize; + }; + + struct FINF { + u8 magic[4]; + u32_le sectionSize; + u8 fontType; + u8 lineFeed; + u16_le alterCharIndex; + u8 default_width[3]; + u8 encoding; + u32_le tglpOffset; + u32_le cwdhOffset; + u32_le cmapOffset; + u8 height; + u8 width; + u8 ascent; + u8 reserved; + }; + + struct TGLP { + u8 magic[4]; + u32_le sectionSize; + u8 cellWidth; + u8 cellHeight; + u8 baselinePosition; + u8 maxCharacterWidth; + u32_le sheetSize; + u16_le numSheets; + u16_le sheetImageFormat; + u16_le numColumns; + u16_le numRows; + u16_le sheetWidth; + u16_le sheetHeight; + u32_le sheetDataOffset; + }; + + struct CMAP { + u8 magic[4]; + u32_le sectionSize; + u16_le codeBegin; + u16_le codeEnd; + u16_le mappingMethod; + u16_le reserved; + u32_le nextCmapOffset; + }; + + struct CWDH { + u8 magic[4]; + u32_le sectionSize; + u16_le startIndex; + u16_le endIndex; + u32_le nextCwdhOffset; + }; + + // Relocates the internal addresses of the BCFNT Shared Font to the new base. The current base will + // be auto-detected based on the file headers. + void relocateSharedFont(u8* sharedFont, u32 newAddress); +} // namespace HLE::Fonts \ No newline at end of file diff --git a/include/services/frd.hpp b/include/services/frd.hpp index b9b3b0fe..914d9251 100644 --- a/include/services/frd.hpp +++ b/include/services/frd.hpp @@ -1,5 +1,6 @@ #pragma once #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -15,6 +16,8 @@ struct FriendKey { static_assert(sizeof(FriendKey) == 16); class FRDService { + using Handle = HorizonHandle; + Memory& mem; MAKE_LOG_FUNCTION(log, frdLogger) @@ -51,11 +54,11 @@ class FRDService { }; static_assert(sizeof(Profile) == 8); -public: + public: enum class Type { - A, // frd:a - N, // frd:n - U, // frd:u + A, // frd:a + N, // frd:n + U, // frd:u }; FRDService(Memory& mem) : mem(mem) {} diff --git a/include/services/fs.hpp b/include/services/fs.hpp index 4a613121..3b3b3d44 100644 --- a/include/services/fs.hpp +++ b/include/services/fs.hpp @@ -16,6 +16,8 @@ class Kernel; class FSService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::FS; Memory& mem; Kernel& kernel; @@ -81,7 +83,7 @@ class FSService { // Used for set/get priority: Not sure what sort of priority this is referring to u32 priority; -public: + public: FSService(Memory& mem, Kernel& kernel, const EmulatorConfig& config) : mem(mem), saveData(mem), sharedExtSaveData_nand(mem, "../SharedFiles/NAND", true), extSaveData_sdmc(mem, "SDMC"), sdmc(mem), sdmcWriteOnly(mem, true), selfNcch(mem), ncch(mem), userSaveData1(mem, ArchiveID::UserSaveData1), diff --git a/include/services/gsp_gpu.hpp b/include/services/gsp_gpu.hpp index 0da4fcd0..d7244609 100644 --- a/include/services/gsp_gpu.hpp +++ b/include/services/gsp_gpu.hpp @@ -22,6 +22,8 @@ enum class GPUInterrupt : u8 { class Kernel; class GPUService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::GPU; Memory& mem; GPU& gpu; diff --git a/include/services/gsp_lcd.hpp b/include/services/gsp_lcd.hpp index e7672d4f..f34f59ab 100644 --- a/include/services/gsp_lcd.hpp +++ b/include/services/gsp_lcd.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class LCDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::LCD; Memory& mem; MAKE_LOG_FUNCTION(log, gspLCDLogger) diff --git a/include/services/hid.hpp b/include/services/hid.hpp index d9018a4f..a0eefb1c 100644 --- a/include/services/hid.hpp +++ b/include/services/hid.hpp @@ -38,6 +38,8 @@ namespace HID::Keys { class Kernel; class HIDService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::HID; Memory& mem; Kernel& kernel; @@ -54,6 +56,7 @@ class HIDService { s16 circlePadX, circlePadY; // Circlepad state s16 touchScreenX, touchScreenY; // Touchscreen state s16 roll, pitch, yaw; // Gyroscope state + s16 accelX, accelY, accelZ; // Accelerometer state bool accelerometerEnabled; bool eventsInitialized; @@ -85,7 +88,14 @@ class HIDService { *(T*)&sharedMem[offset] = value; } + template + T* getSharedMemPointer(size_t offset) { + return (T*)&sharedMem[offset]; + } + public: + static constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS + HIDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); @@ -126,6 +136,12 @@ class HIDService { void setPitch(s16 value) { pitch = value; } void setYaw(s16 value) { yaw = value; } + void setAccel(s16 x, s16 y, s16 z) { + accelX = x; + accelY = y; + accelZ = z; + } + void updateInputs(u64 currentTimestamp); void setSharedMem(u8* ptr) { diff --git a/include/services/http.hpp b/include/services/http.hpp index 1e7f30c3..8b23fb2d 100644 --- a/include/services/http.hpp +++ b/include/services/http.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class HTTPService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::HTTP; Memory& mem; MAKE_LOG_FUNCTION(log, httpLogger) diff --git a/include/services/ir_user.hpp b/include/services/ir_user.hpp index 186d9717..d475bdaa 100644 --- a/include/services/ir_user.hpp +++ b/include/services/ir_user.hpp @@ -11,6 +11,8 @@ class Kernel; class IRUserService { + using Handle = HorizonHandle; + enum class DeviceID : u8 { CirclePadPro = 1, }; diff --git a/include/services/ldr_ro.hpp b/include/services/ldr_ro.hpp index 71516547..cf60e036 100644 --- a/include/services/ldr_ro.hpp +++ b/include/services/ldr_ro.hpp @@ -8,6 +8,8 @@ class Kernel; class LDRService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::LDR_RO; Memory& mem; Kernel& kernel; @@ -22,7 +24,7 @@ class LDRService { void loadCRR(u32 messagePointer); void unloadCRO(u32 messagePointer); -public: + public: LDRService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/mcu/mcu_hwc.hpp b/include/services/mcu/mcu_hwc.hpp index 354a0c20..4c6a8830 100644 --- a/include/services/mcu/mcu_hwc.hpp +++ b/include/services/mcu/mcu_hwc.hpp @@ -7,6 +7,8 @@ namespace MCU { class HWCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::MCU_HWC; Memory& mem; MAKE_LOG_FUNCTION(log, mcuLogger) diff --git a/include/services/mic.hpp b/include/services/mic.hpp index f709c27f..f166c5aa 100644 --- a/include/services/mic.hpp +++ b/include/services/mic.hpp @@ -9,6 +9,8 @@ class Kernel; class MICService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::MIC; Memory& mem; Kernel& kernel; @@ -29,14 +31,14 @@ class MICService { void unmapSharedMem(u32 messagePointer); void theCaptainToadFunction(u32 messagePointer); - u8 gain = 0; // How loud our microphone input signal is + u8 gain = 0; // How loud our microphone input signal is bool micEnabled = false; bool shouldClamp = false; bool currentlySampling = false; std::optional eventHandle; -public: + public: MICService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ndm.hpp b/include/services/ndm.hpp index 6d4e5ad8..67679403 100644 --- a/include/services/ndm.hpp +++ b/include/services/ndm.hpp @@ -6,7 +6,14 @@ #include "result/result.hpp" class NDMService { - enum class ExclusiveState : u32 { None = 0, Infrastructure = 1, LocalComms = 2, StreetPass = 3, StreetPassData = 4 }; + using Handle = HorizonHandle; + enum class ExclusiveState : u32 { + None = 0, + Infrastructure = 1, + LocalComms = 2, + StreetPass = 3, + StreetPassData = 4, + }; Handle handle = KernelHandles::NDM; Memory& mem; @@ -25,7 +32,7 @@ class NDMService { ExclusiveState exclusiveState = ExclusiveState::None; -public: + public: NDMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/news_u.hpp b/include/services/news_u.hpp index 61266e9a..15ae0b16 100644 --- a/include/services/news_u.hpp +++ b/include/services/news_u.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class NewsUService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NEWS_U; Memory& mem; MAKE_LOG_FUNCTION(log, newsLogger) diff --git a/include/services/nfc.hpp b/include/services/nfc.hpp index 8eea8a41..e242a326 100644 --- a/include/services/nfc.hpp +++ b/include/services/nfc.hpp @@ -12,6 +12,8 @@ class Kernel; class NFCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NFC; Memory& mem; Kernel& kernel; diff --git a/include/services/nim.hpp b/include/services/nim.hpp index dfe13694..dbb3bb8b 100644 --- a/include/services/nim.hpp +++ b/include/services/nim.hpp @@ -6,6 +6,8 @@ #include "result/result.hpp" class NIMService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NIM; Memory& mem; MAKE_LOG_FUNCTION(log, nimLogger) @@ -13,7 +15,7 @@ class NIMService { // Service commands void initialize(u32 messagePointer); -public: + public: NIMService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/nwm_uds.hpp b/include/services/nwm_uds.hpp index bf116bcf..a3b342b8 100644 --- a/include/services/nwm_uds.hpp +++ b/include/services/nwm_uds.hpp @@ -10,6 +10,8 @@ class Kernel; class NwmUdsService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::NWM_UDS; Memory& mem; Kernel& kernel; diff --git a/include/services/ptm.hpp b/include/services/ptm.hpp index f752839b..5b797a1d 100644 --- a/include/services/ptm.hpp +++ b/include/services/ptm.hpp @@ -22,7 +22,7 @@ class PTMService { void getStepHistoryAll(u32 messagePointer); void getTotalStepCount(u32 messagePointer); -public: + public: enum class Type { U, // ptm:u SYSM, // ptm:sysm diff --git a/include/services/service_manager.hpp b/include/services/service_manager.hpp index 8d1cf381..4fa1e665 100644 --- a/include/services/service_manager.hpp +++ b/include/services/service_manager.hpp @@ -42,6 +42,8 @@ struct EmulatorConfig; class Kernel; class ServiceManager { + using Handle = HorizonHandle; + std::span regs; Memory& mem; Kernel& kernel; @@ -109,4 +111,5 @@ class ServiceManager { HIDService& getHID() { return hid; } NFCService& getNFC() { return nfc; } DSPService& getDSP() { return dsp; } + Y2RService& getY2R() { return y2r; } }; diff --git a/include/services/soc.hpp b/include/services/soc.hpp index 88f0b456..ff334a2c 100644 --- a/include/services/soc.hpp +++ b/include/services/soc.hpp @@ -5,6 +5,8 @@ #include "memory.hpp" class SOCService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::SOC; Memory& mem; MAKE_LOG_FUNCTION(log, socLogger) @@ -14,7 +16,7 @@ class SOCService { // Service commands void initializeSockets(u32 messagePointer); -public: + public: SOCService(Memory& mem) : mem(mem) {} void reset(); void handleSyncRequest(u32 messagePointer); diff --git a/include/services/ssl.hpp b/include/services/ssl.hpp index 0282049a..4b45fc81 100644 --- a/include/services/ssl.hpp +++ b/include/services/ssl.hpp @@ -1,17 +1,19 @@ #pragma once +#include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" #include "memory.hpp" -#include - class SSLService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::SSL; Memory& mem; MAKE_LOG_FUNCTION(log, sslLogger) - std::mt19937 rng; // Use a Mersenne Twister for RNG since this service is supposed to have better rng than just rand() + std::mt19937 rng; // Use a Mersenne Twister for RNG since this service is supposed to have better rng than just rand() bool initialized; // Service commands diff --git a/include/services/y2r.hpp b/include/services/y2r.hpp index 0cc1d587..6afebdb8 100644 --- a/include/services/y2r.hpp +++ b/include/services/y2r.hpp @@ -1,6 +1,7 @@ #pragma once #include #include + #include "helpers.hpp" #include "kernel_types.hpp" #include "logger.hpp" @@ -10,6 +11,8 @@ class Kernel; class Y2RService { + using Handle = HorizonHandle; + Handle handle = KernelHandles::Y2R; Memory& mem; Kernel& kernel; @@ -20,7 +23,7 @@ class Y2RService { enum class BusyStatus : u32 { NotBusy = 0, - Busy = 1 + Busy = 1, }; enum class InputFormat : u32 { @@ -35,7 +38,7 @@ class Y2RService { RGB32 = 0, RGB24 = 1, RGB15 = 2, - RGB565 = 3 + RGB565 = 3, }; // Clockwise rotation @@ -43,12 +46,12 @@ class Y2RService { None = 0, Rotate90 = 1, Rotate180 = 2, - Rotate270 = 3 + Rotate270 = 3, }; enum class BlockAlignment : u32 { - Line = 0, // Output buffer's pixels are arranged linearly. Used when outputting to the framebuffer. - Block8x8 = 1, // Output buffer's pixels are morton swizzled. Used when outputting to a GPU texture. + Line = 0, // Output buffer's pixels are arranged linearly. Used when outputting to the framebuffer. + Block8x8 = 1, // Output buffer's pixels are morton swizzled. Used when outputting to a GPU texture. }; // https://github.com/citra-emu/citra/blob/ac9d72a95ca9a60de8d39484a14aecf489d6d016/src/core/hle/service/cam/y2r_u.cpp#L33 @@ -60,7 +63,7 @@ class Y2RService { {{0x12A, 0x1CA, 0x88, 0x36, 0x21C, -0x1F04, 0x99C, -0x2421}}, // ITU_Rec709_Scaling }}; - CoefficientSet conversionCoefficients; // Current conversion coefficients + CoefficientSet conversionCoefficients; // Current conversion coefficients InputFormat inputFmt; OutputFormat outputFmt; @@ -113,8 +116,12 @@ class Y2RService { void startConversion(u32 messagePointer); void stopConversion(u32 messagePointer); -public: + bool isBusy; + + public: Y2RService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {} void reset(); void handleSyncRequest(u32 messagePointer); + + void signalConversionDone(); }; \ No newline at end of file diff --git a/readme.md b/readme.md index 5f803bde..3a33fc71 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,5 @@ # Panda3DS -[![Windows Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml) [![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml) [![Linux Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml) [![AUR Package](https://img.shields.io/aur/version/panda3ds-git)](https://aur.archlinux.org/packages/panda3ds-git) +[![Windows Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Windows_Build.yml) [![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml) [![Android Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml) [![Linux Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml) [![AUR Package](https://img.shields.io/aur/version/panda3ds-git)](https://aur.archlinux.org/packages/panda3ds-git) Panda3DS is an HLE, red-panda-themed Nintendo 3DS emulator written in C++ which started out as a fun project out of curiosity, but evolved into something that can sort of play games! @@ -10,7 +10,7 @@ Join our Discord server by pressing on the banner below, or find us on other pla [![Discord Banner 2](https://discord.com/api/guilds/1118695732958994532/widget.png?style=banner2)](https://discord.gg/ZYbugsEmsw) -![screenshot1](docs/img/KirbyRobobot.png) ![screenshot2](docs/img/OoT_Title.png) ![screenshot3](docs/img/pokegang.png) +![screenshot1](docs/img/KirbyRobobot.png) ![screenshot2](docs/img/OoT_Title.png) ![screenshot3](docs/img/pokegang.png) ![screenshot4](docs/img/KirbyAndroid.png) # Download You can download stable builds from the Releases tab, or you can download the latest build from the tables below. Additionally, Panda3DS comes in 2 flavours on PC: A minimal SDL frontend, which does not have a GUI, and an experimental Qt 6 frontend with a proper user interface. @@ -22,16 +22,16 @@ SDL builds (No GUI): |MacOS build|[![MacOS Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/MacOS_Build.yml)|[MacOS App Bundle](https://nightly.link/wheremyfoodat/Panda3DS/workflows/MacOS_Build/master/MacOS%20Alber%20App%20Bundle.zip)| |Linux build|[![Linux Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml/badge.svg?branch=master)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Linux_Build.yml)|[Linux AppImage](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Linux_AppImage_Build/master/Linux%20executable.zip)| -Qt builds: +Qt and Android builds: |Platform|Status|Download| |--------|------------|--------| |Windows build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[Windows Executable](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/Windows%20executable.zip)| |MacOS build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[MacOS App Bundle](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/MacOS%20Alber%20App%20Bundle.zip)| |Linux build|[![Qt Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Qt_Build.yml)|[Linux AppImage](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Qt_Build/master/Linux%20executable.zip)| - +|Android build (arm64)|[![Android Build](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml/badge.svg)](https://github.com/wheremyfoodat/Panda3DS/actions/workflows/Android_Build.yml)|[Android APK](https://nightly.link/wheremyfoodat/Panda3DS/workflows/Android_Build/master/Android%20APKs%20(arm64).zip)| # Compatibility -Panda3DS is still in the early stages of development. Many games boot, many don't. Lots of games have at least some hilariously broken graphics, audio is not supported, and some QoL features (including a GUI) are missing. However, even more things are implemented, such as most of the 3DS core required to play games, and various neat features, such as Lua scripting, discord bot support, support for some system apps, cheats, controller support, WIP amiibo support and many more! The emulator is constantly evolving, so make sure to take a peek every now and then! +Panda3DS is still in the early stages of development. Many games boot, many don't. Lots of games have at least some hilariously broken graphics, audio is WIP, and some QoL features are missing. However, even more things are implemented, such as most of the 3DS core required to play games, and various neat features, such as Lua scripting, discord bot support, support for some system apps, cheats, controller support, WIP amiibo support and many more! The emulator is constantly evolving, so make sure to take a peek every now and then! For documenting game compatibility, make sure to visit the [games list repository](https://github.com/Panda3DS-emu/Panda3DS-Games-List). For miscellaneous issues or more technical issues, feel free to use this repo's issues tab. # Why? @@ -116,7 +116,7 @@ Panda3DS also supports controller input using the SDL2 GameController API. - [MelonDS](https://github.com/melonDS-emu/melonDS): "DS emulator, sorta" - Arisotura - [Kaizen](https://github.com/SimoneN64/Kaizen): Experimental work-in-progress low-level N64 emulator - [ChonkyStation](https://github.com/liuk7071/ChonkyStation): Work-in-progress PlayStation emulator -- [shadPS4](https://github.com/georgemoralis/shadPS4): Work-in-progress PS4 emulator by the founder of PCSX, PCSX2 and more +- [shadPS4](https://github.com/shadps4-emu/shadPS4): Work-in-progress PS4 emulator by the founder of PCSX, PCSX2 and more - [Hydra](https://github.com/hydra-emu/hydra): Cross-platform GameBoy, NES, N64 and Chip-8 emulator # Support diff --git a/src/config.cpp b/src/config.cpp index 2f9b7e00..4cd18858 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -41,6 +41,23 @@ void EmulatorConfig::load() { discordRpcEnabled = toml::find_or(general, "EnableDiscordRPC", false); usePortableBuild = toml::find_or(general, "UsePortableBuild", false); defaultRomPath = toml::find_or(general, "DefaultRomPath", ""); + + printAppVersion = toml::find_or(general, "PrintAppVersion", true); + } + } + + if (data.contains("Window")) { + auto windowResult = toml::expect(data.at("Window")); + if (windowResult.is_ok()) { + auto window = windowResult.unwrap(); + + windowSettings.showAppVersion = toml::find_or(window, "AppVersionOnWindow", false); + windowSettings.rememberPosition = toml::find_or(window, "RememberWindowPosition", false); + + windowSettings.x = toml::find_or(window, "WindowPosX", WindowSettings::defaultX); + windowSettings.y = toml::find_or(window, "WindowPosY", WindowSettings::defaultY); + windowSettings.width = toml::find_or(window, "WindowWidth", WindowSettings::defaultWidth); + windowSettings.height = toml::find_or(window, "WindowHeight", WindowSettings::defaultHeight); } } @@ -62,6 +79,13 @@ void EmulatorConfig::load() { shaderJitEnabled = toml::find_or(gpu, "EnableShaderJIT", shaderJitDefault); vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); + useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); + accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); + accelerateShaders = toml::find_or(gpu, "AccelerateShaders", accelerateShadersDefault); + + forceShadergenForLights = toml::find_or(gpu, "ForceShadergenForLighting", true); + lightShadergenThreshold = toml::find_or(gpu, "ShadergenLightThreshold", 1); + enableRenderdoc = toml::find_or(gpu, "EnableRenderdoc", false); } } @@ -70,7 +94,7 @@ void EmulatorConfig::load() { if (audioResult.is_ok()) { auto audio = audioResult.unwrap(); - auto dspCoreName = toml::find_or(audio, "DSPEmulation", "Null"); + auto dspCoreName = toml::find_or(audio, "DSPEmulation", "HLE"); dspType = Audio::DSPCore::typeFromString(dspCoreName); audioEnabled = toml::find_or(audio, "EnableAudio", false); } @@ -122,9 +146,25 @@ void EmulatorConfig::save() { data["General"]["EnableDiscordRPC"] = discordRpcEnabled; data["General"]["UsePortableBuild"] = usePortableBuild; data["General"]["DefaultRomPath"] = defaultRomPath.string(); + data["General"]["PrintAppVersion"] = printAppVersion; + + data["Window"]["AppVersionOnWindow"] = windowSettings.showAppVersion; + data["Window"]["RememberWindowPosition"] = windowSettings.rememberPosition; + data["Window"]["WindowPosX"] = windowSettings.x; + data["Window"]["WindowPosY"] = windowSettings.y; + data["Window"]["WindowWidth"] = windowSettings.width; + data["Window"]["WindowHeight"] = windowSettings.height; + data["GPU"]["EnableShaderJIT"] = shaderJitEnabled; data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType)); data["GPU"]["EnableVSync"] = vsyncEnabled; + data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; + data["GPU"]["UseUbershaders"] = useUbershaders; + data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights; + data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold; + data["GPU"]["AccelerateShaders"] = accelerateShaders; + data["GPU"]["EnableRenderdoc"] = enableRenderdoc; + data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/draw_acceleration.cpp b/src/core/PICA/draw_acceleration.cpp new file mode 100644 index 00000000..d7df3b77 --- /dev/null +++ b/src/core/PICA/draw_acceleration.cpp @@ -0,0 +1,137 @@ +#include "PICA/draw_acceleration.hpp" + +#include +#include + +#include "PICA/gpu.hpp" +#include "PICA/pica_simd.hpp" +#include "PICA/regs.hpp" + +void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) { + accel.indexed = indexed; + accel.totalAttribCount = totalAttribCount; + accel.enabledAttributeMask = 0; + + const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer + + if (indexed) { + u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig]; + u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff); + + u8* indexBuffer = getPointerPhys(indexBufferPointer); + u16 minimumIndex = std::numeric_limits::max(); + u16 maximumIndex = 0; + + // Check whether the index buffer uses u16 indices or u8 + accel.useShortIndices = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit + + // Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them + if (accel.useShortIndices) { + std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze(indexBuffer, vertexCount); + } else { + std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze(indexBuffer, vertexCount); + } + + accel.indexBuffer = indexBuffer; + } else { + accel.indexBuffer = nullptr; + accel.minimumIndex = regs[PICA::InternalRegs::VertexOffsetReg]; + accel.maximumIndex = accel.minimumIndex + vertexCount - 1; + } + + const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32); + const u64 inputAttrCfg = getVertexShaderInputConfig(); + + u32 attrCount = 0; + u32 loaderOffset = 0; + accel.vertexDataSize = 0; + accel.totalLoaderCount = 0; + + for (int i = 0; i < PICA::DrawAcceleration::maxLoaderCount; i++) { + auto& loaderData = attributeInfo[i]; // Get information for this attribute loader + + // This loader is empty, skip it + if (loaderData.componentCount == 0 || loaderData.size == 0) { + continue; + } + + auto& loader = accel.loaders[accel.totalLoaderCount++]; + + // The size of the loader in bytes is equal to the bytes supplied for 1 vertex, multiplied by the number of vertices we'll be uploading + // Which is equal to maximumIndex - minimumIndex + 1 + const u32 bytes = loaderData.size * (accel.maximumIndex - accel.minimumIndex + 1); + loader.size = bytes; + + // Add it to the total vertex data size, aligned to 4 bytes. + accel.vertexDataSize += (bytes + 3) & ~3; + + // Get a pointer to the data where this loader's data is stored + const u32 loaderAddress = vertexBase + loaderData.offset + (accel.minimumIndex * loaderData.size); + loader.data = getPointerPhys(loaderAddress); + + u64 attrCfg = loaderData.getConfigFull(); // Get config1 | (config2 << 32) + u32 attributeOffset = 0; + + for (int component = 0; component < loaderData.componentCount; component++) { + uint attributeIndex = (attrCfg >> (component * 4)) & 0xf; // Get index of attribute in vertexCfg + + // Vertex attributes used as padding + // 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively + if (attributeIndex >= 12) [[unlikely]] { + // Align attribute address up to a 4 byte boundary + attributeOffset = (attributeOffset + 3) & -4; + attributeOffset += (attributeIndex - 11) << 2; + continue; + } + + const u32 attribInfo = (vertexCfg >> (attributeIndex * 4)) & 0xf; + const u32 attribType = attribInfo & 0x3; // Type of attribute (sbyte/ubyte/short/float) + const u32 size = (attribInfo >> 2) + 1; // Total number of components + + // Size of each component based on the attribute type + static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4}; + const u32 inputReg = (inputAttrCfg >> (attributeIndex * 4)) & 0xf; + // Mark the attribute as enabled + accel.enabledAttributeMask |= 1 << inputReg; + + auto& attr = accel.attributeInfo[inputReg]; + attr.componentCount = size; + attr.offset = attributeOffset + loaderOffset; + attr.stride = loaderData.size; + attr.type = attribType; + attributeOffset += size * sizePerComponent[attribType]; + } + + loaderOffset += loader.size; + } + + u32 fixedAttributes = fixedAttribMask; + accel.fixedAttributes = 0; + + // Fetch values for all fixed attributes using CLZ on the fixed attribute mask to find the attributes that are actually fixed + while (fixedAttributes != 0) { + // Get index of next fixed attribute and turn it off + const u32 index = std::countr_zero(fixedAttributes); + const u32 mask = 1u << index; + fixedAttributes ^= mask; + + // PICA register this fixed attribute is meant to go to + const u32 inputReg = (inputAttrCfg >> (index * 4)) & 0xf; + const u32 inputRegMask = 1u << inputReg; + + // If this input reg is already used for a non-fixed attribute then it will not be replaced by a fixed attribute + if ((accel.enabledAttributeMask & inputRegMask) == 0) { + vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[index]; + auto& attr = accel.attributeInfo[inputReg]; + + accel.fixedAttributes |= inputRegMask; + + for (int i = 0; i < 4; i++) { + attr.fixedValue[i] = fixedAttr[i].toFloat32(); + } + } + } + + accel.canBeAccelerated = true; +} \ No newline at end of file diff --git a/src/core/PICA/dynapica/shader_rec.cpp b/src/core/PICA/dynapica/shader_rec.cpp index 20e171d7..e3c13c1e 100644 --- a/src/core/PICA/dynapica/shader_rec.cpp +++ b/src/core/PICA/dynapica/shader_rec.cpp @@ -16,7 +16,7 @@ void ShaderJIT::prepare(PICAShader& shaderUnit) { auto it = cache.find(hash); if (it == cache.end()) { // Block has not been compiled yet - auto emitter = std::make_unique(); + auto emitter = std::make_unique(accurateMul); emitter->compile(shaderUnit); // Get pointer to callbacks entrypointCallback = emitter->getInstructionCallback(shaderUnit.entrypoint); diff --git a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp index d6358070..296ec932 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp @@ -7,9 +7,6 @@ using namespace Helpers; using namespace oaknut; using namespace oaknut::util; -// TODO: Expose safe/unsafe optimizations to the user -constexpr bool useSafeMUL = true; - // Similar to the x64 recompiler, we use an odd internal ABI, which abuses the fact that we'll very rarely be calling C++ functions // So to avoid pushing and popping, we'll be making use of volatile registers as much as possible static constexpr QReg src1Vec = Q1; @@ -144,8 +141,8 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) { case ShaderOpcodes::CMP2: recCMP(shaderUnit, instruction); break; case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break; case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break; - // case ShaderOpcodes::DPH: - // case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break; + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break; case ShaderOpcodes::END: recEND(shaderUnit, instruction); break; case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break; case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break; @@ -491,7 +488,7 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) { // Now do a full DP4 // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -518,7 +515,40 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor); // Do a piecewise multiplication of the vectors first - if constexpr (useSafeMUL) { + if (useSafeMUL) { + emitSafeMUL(src1Vec, src2Vec, scratch1Vec); + } else { + FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); + } + FADDP(src1Vec.S4(), src1Vec.S4(), src1Vec.S4()); // Now add the adjacent components together + FADDP(src1Vec.toS(), src1Vec.toD().S2()); // Again for the bottom 2 lanes. Now the bottom lane contains the dot product + + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + DUP(src1Vec.S4(), src1Vec.Selem()[0]); // src1Vec = src1Vec.xxxx + } + + storeRegister(src1Vec, shader, dest, operandDescriptor); +} + +void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { + const bool isDPHI = (instruction >> 26) == ShaderOpcodes::DPHI; + + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const u32 src1 = isDPHI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2 = isDPHI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + const u32 writeMask = getBits<0, 4>(operandDescriptor); + + // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) + loadRegister<1>(src1Vec, shader, src1, isDPHI ? 0 : idx, operandDescriptor); + loadRegister<2>(src2Vec, shader, src2, isDPHI ? idx : 0, operandDescriptor); + // // Attach 1.0 to the w component of src1 + MOV(src1Vec.Selem()[3], onesVector.Selem()[0]); + + // Now perform a DP4 + // Do a piecewise multiplication of the vectors first + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -801,7 +831,7 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) { loadRegister<1>(src1Vec, shader, src1, idx, operandDescriptor); loadRegister<2>(src2Vec, shader, src2, 0, operandDescriptor); - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); } else { FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4()); @@ -874,7 +904,7 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2Vec, shader, src2, isMADI ? 0 : idx, operandDescriptor); loadRegister<3>(src3Vec, shader, src3, isMADI ? idx : 0, operandDescriptor); - if constexpr (useSafeMUL) { + if (useSafeMUL) { emitSafeMUL(src1Vec, src2Vec, scratch1Vec); FADD(src3Vec.S4(), src3Vec.S4(), src1Vec.S4()); } else { diff --git a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp index c134b72f..142ff8c8 100644 --- a/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp +++ b/src/core/PICA/dynapica/shader_rec_emitter_x64.cpp @@ -45,6 +45,16 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) { L(onesVector); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); dd(0x3f800000); // 1.0 4 times + if (useSafeMUL) { + // When doing safe mul, we need a vector to set only the w component to 0 for DP3 + L(dp3Vector); + + dd(0xFFFFFFFF); + dd(0xFFFFFFFF); + dd(0xFFFFFFFF); + dd(0); + } + // Emit prologue first align(16); prologueCb = getCurr(); @@ -523,24 +533,60 @@ void ShaderEmitter::recDP3(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - dpps(src1_xmm, src2_xmm, 0b01111111); // 3-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + + if (!useSafeMUL) { + dpps(src1_xmm, src2_xmm, 0b01111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set w component to 0 and do a DP4 + andps(src1_xmm, xword[rip + dp3Vector]); + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) { const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; const u32 src1 = getBits<12, 7>(instruction); - const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment + const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + + if (!useSafeMUL) { + // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + dpps(src1_xmm, src2_xmm, 0b11111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -553,7 +599,6 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, isDPHI ? 0 : idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, isDPHI ? idx : 0, operandDescriptor); @@ -566,7 +611,25 @@ void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) { unpcklpd(src1_xmm, scratch1); } - dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + // Now perform a DP4 + if (!useSafeMUL) { + // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA + dpps(src1_xmm, src2_xmm, 0b11111111); + } else { + const u32 writeMask = operandDescriptor & 0xf; + + // Set src1 to src1 * src2, then get the dot product by doing 2 horizontal adds + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + haddps(src1_xmm, src1_xmm); + haddps(src1_xmm, src1_xmm); + + // If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx + // Otherwise we do + if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x + shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx + } + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -603,10 +666,15 @@ void ShaderEmitter::recMUL(const PICAShader& shader, u32 instruction) { const u32 idx = getBits<19, 2>(instruction); const u32 dest = getBits<21, 5>(instruction); - // TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA) loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor); loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor); - mulps(src1_xmm, src2_xmm); + + if (!useSafeMUL) { + mulps(src1_xmm, src2_xmm); + } else { + emitSafeMUL(src1_xmm, src2_xmm, scratch1); + } + storeRegister(src1_xmm, shader, dest, operandDescriptor); } @@ -662,23 +730,31 @@ void ShaderEmitter::recMAD(const PICAShader& shader, u32 instruction) { loadRegister<2>(src2_xmm, shader, src2, isMADI ? 0 : idx, operandDescriptor); loadRegister<3>(src3_xmm, shader, src3, isMADI ? idx : 0, operandDescriptor); - // TODO: Implement safe PICA mul // If we have FMA3, optimize MAD to use FMA - if (haveFMA3) { - vfmadd213ps(src1_xmm, src2_xmm, src3_xmm); - storeRegister(src1_xmm, shader, dest, operandDescriptor); - } - - // If we don't have FMA3, do a multiplication and addition - else { - // Multiply src1 * src2 - if (haveAVX) { - vmulps(scratch1, src1_xmm, src2_xmm); - } else { - movaps(scratch1, src1_xmm); - mulps(scratch1, src2_xmm); + if (!useSafeMUL) { + if (haveFMA3) { + vfmadd213ps(src1_xmm, src2_xmm, src3_xmm); + storeRegister(src1_xmm, shader, dest, operandDescriptor); } + // If we don't have FMA3, do a multiplication and addition + else { + // Multiply src1 * src2 + if (haveAVX) { + vmulps(scratch1, src1_xmm, src2_xmm); + } else { + movaps(scratch1, src1_xmm); + mulps(scratch1, src2_xmm); + } + + // Add src3 + addps(scratch1, src3_xmm); + storeRegister(scratch1, shader, dest, operandDescriptor); + } + } else { + movaps(scratch1, src1_xmm); + emitSafeMUL(scratch1, src2_xmm, src1_xmm); + // Add src3 addps(scratch1, src3_xmm); storeRegister(scratch1, shader, dest, operandDescriptor); @@ -1115,6 +1191,41 @@ Xbyak::Label ShaderEmitter::emitLog2Func() { return subroutine; } +void ShaderEmitter::emitSafeMUL(Xmm src1, Xmm src2, Xmm scratch) { + // 0 * inf and inf * 0 in the PICA should return 0 instead of NaN + // This can be done by checking for NaNs before and after a multiplication + // To do this we can create a mask of which components of src1/src2 are NOT NaN using cmpordsps (cmpps with imm = 7) + // Then we multiply src1 and src2 and reate a mask of which components of the result ARE NaN using cmpunordps + // If the NaNs didn't exist (ie they were created by 0 * inf) before then we set them to 0 by XORing the 2 masks and ANDing the multiplication + // result with the xor result + // Based on Citra implementation, particularly the AVX-512 version + + if (cpuCaps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) { + const Xbyak::Opmask zeroMask = k1; + + vmulps(scratch, src1, src2); + // Mask of any NaN values found in the result + vcmpunordps(zeroMask, scratch, scratch); + // Mask of any non-NaN inputs producing NaN results + vcmpordps(zeroMask | zeroMask, src1, src2); + + knotb(zeroMask, zeroMask); + vmovaps(src1 | zeroMask | T_z, scratch); + } else { + if (haveAVX) { + vcmpordps(scratch, src1, src2); + } else { + movaps(scratch, src1); + cmpordps(scratch, src2); + } + + mulps(src1, src2); + cmpunordps(src2, src1); + xorps(src2, scratch); + andps(src1, src2); + } +} + Xbyak::Label ShaderEmitter::emitExp2Func() { Xbyak::Label subroutine; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index a777d0a3..838d3fb3 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -15,6 +15,9 @@ #ifdef PANDA3DS_ENABLE_VULKAN #include "renderer_vk/renderer_vk.hpp" #endif +#ifdef PANDA3DS_ENABLE_METAL +#include "renderer_mtl/renderer_mtl.hpp" +#endif constexpr u32 topScreenWidth = 240; constexpr u32 topScreenHeight = 400; @@ -52,22 +55,37 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) { renderer.reset(new RendererVK(*this, regs, externalRegs)); break; } +#endif +#ifdef PANDA3DS_ENABLE_METAL + case RendererType::Metal: { + renderer.reset(new RendererMTL(*this, regs, externalRegs)); + break; + } #endif default: { Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType)); break; } } + + if (renderer != nullptr) { + renderer->setConfig(&config); + } } void GPU::reset() { regs.fill(0); shaderUnit.reset(); shaderJIT.reset(); + shaderJIT.setAccurateMul(config.accurateShaderMul); + std::memset(vram, 0, vramSize); lightingLUT.fill(0); lightingLUTDirty = true; + fogLUT.fill(0); + fogLUTDirty = true; + totalAttribCount = 0; fixedAttribMask = 0; fixedAttribIndex = 0; @@ -111,33 +129,59 @@ void GPU::reset() { renderer->reset(); } +static std::array vertices; + // Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter) // And whether we are going to use the shader JIT (second template parameter) void GPU::drawArrays(bool indexed) { - const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; + PICA::DrawAcceleration accel; - if (indexed) { - if (shaderJITEnabled) - drawArrays(); - else - drawArrays(); + if (config.accelerateShaders) { + // If we are potentially going to use hw shaders, gather necessary to do vertex fetch, index buffering, etc on the GPU + // This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on + getAcceleratedDrawInfo(accel, indexed); + } + + const bool hwShaders = renderer->prepareForDraw(shaderUnit, &accel); + + if (hwShaders) { + // Hardware shaders have their own accelerated code path for draws, so they skip everything here + const PICA::PrimType primType = static_cast(Helpers::getBits<8, 2>(regs[PICA::InternalRegs::PrimitiveConfig])); + // Total # of vertices to render + const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; + + // Note: In the hardware shader path the vertices span shouldn't actually be used as the renderer will perform its own attribute fetching + renderer->drawVertices(primType, std::span(vertices).first(vertexCount)); } else { - if (shaderJITEnabled) - drawArrays(); - else - drawArrays(); + const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled; + + if (indexed) { + if (shaderJITEnabled) { + drawArrays(); + } else { + drawArrays(); + } + } else { + if (shaderJITEnabled) { + drawArrays(); + } else { + drawArrays(); + } + } } } -static std::array vertices; - -template +template void GPU::drawArrays() { - if constexpr (useShaderJIT) { + if constexpr (mode == ShaderExecMode::JIT) { shaderJIT.prepare(shaderUnit.vs); + } else if constexpr (mode == ShaderExecMode::Hardware) { + // Hardware shaders have their own accelerated code path for draws, so they're not meant to take this path + Helpers::panic("GPU::DrawArrays: Hardware shaders shouldn't take this path!"); } - setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]); + // We can have up to 16 attributes, each one consisting of 4 floats + constexpr u32 maxAttrSizeInFloats = 16 * 4; // Base address for vertex attributes // The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible @@ -147,7 +191,10 @@ void GPU::drawArrays() { // Configures the type of primitive and the number of vertex shader outputs const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig]; const PICA::PrimType primType = static_cast(Helpers::getBits<8, 2>(primConfig)); - if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize"); + if (vertexCount > Renderer::vertexBufferSize) [[unlikely]] { + Helpers::warn("[PICA] vertexCount > vertexBufferSize"); + return; + } if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) || (primType == PICA::PrimType::TriangleFan && vertexCount < 3)) { @@ -299,8 +346,6 @@ void GPU::drawArrays() { } // Fill the remaining attribute lanes with default parameters (1.0 for alpha/w, 0.0) for everything else - // Corgi does this although I'm not sure if it's actually needed for anything. - // TODO: Find out while (component < 4) { attribute[component] = (component == 3) ? f24::fromFloat32(1.0) : f24::fromFloat32(0.0); component++; @@ -314,13 +359,13 @@ void GPU::drawArrays() { // Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers // Based on the SH_ATTRIBUTES_PERMUTATION registers. - // Ie it might attribute #0 to v2, #1 to v7, etc + // Ie it might map attribute #0 to v2, #1 to v7, etc for (int j = 0; j < totalAttribCount; j++) { const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf; std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f)); } - if constexpr (useShaderJIT) { + if constexpr (mode == ShaderExecMode::JIT) { shaderJIT.run(shaderUnit.vs); } else { shaderUnit.vs.run(); @@ -355,7 +400,7 @@ PICA::Vertex GPU::getImmediateModeVertex() { // Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute shaderUnit.vs.run(); - + // Map shader outputs to fixed function properties const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; for (int i = 0; i < totalShaderOutputs; i++) { diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index baaa2256..4c865d12 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -135,6 +135,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + case FogLUTData0: + case FogLUTData1: + case FogLUTData2: + case FogLUTData3: + case FogLUTData4: + case FogLUTData5: + case FogLUTData6: + case FogLUTData7: { + const uint32_t index = regs[FogLUTIndex] & 0x7F; + fogLUT[index] = value; + fogLUTDirty = true; + regs[FogLUTIndex] = (index + 1) & 0x7F; + break; + } + case LightingLUTData0: case LightingLUTData1: case LightingLUTData2: @@ -234,6 +249,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { // If we've reached 3 verts, issue a draw call // Handle rendering depending on the primitive type if (immediateModeVertIndex == 3) { + renderer->prepareForDraw(shaderUnit, nullptr); renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices); switch (primType) { @@ -285,7 +301,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { } case VertexBoolUniform: { - shaderUnit.vs.boolUniform = value & 0xffff; + shaderUnit.vs.uploadBoolUniform(value & 0xffff); break; } @@ -314,9 +330,11 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; } + /* TODO: Find out if this actually does anything case VertexShaderTransferEnd: if (value != 0) shaderUnit.vs.finalize(); break; + */ case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break; diff --git a/src/core/PICA/shader_decompiler.cpp b/src/core/PICA/shader_decompiler.cpp new file mode 100644 index 00000000..467c4727 --- /dev/null +++ b/src/core/PICA/shader_decompiler.cpp @@ -0,0 +1,832 @@ +#include "PICA/shader_decompiler.hpp" + +#include + +#include +#include + +#include "config.hpp" + +using namespace PICA; +using namespace PICA::ShaderGen; +using namespace Helpers; + +using Function = ControlFlow::Function; +using ExitMode = Function::ExitMode; + +void ControlFlow::analyze(const PICAShader& shader, u32 entrypoint) { + analysisFailed = false; + + const Function* function = addFunction(shader, entrypoint, PICAShader::maxInstructionCount); + if (function == nullptr || function->exitMode != ExitMode::AlwaysEnd) { + analysisFailed = true; + } +} + +// Helpers for merging parallel/series exit methods from Citra +// Merges exit method of two parallel branches. +static ExitMode exitParallel(ExitMode a, ExitMode b) { + if (a == ExitMode::Unknown) { + return b; + } + else if (b == ExitMode::Unknown) { + return a; + } + else if (a == b) { + return a; + } + return ExitMode::Conditional; +} + +// Cascades exit method of two blocks of code. +static ExitMode exitSeries(ExitMode a, ExitMode b) { + assert(a != ExitMode::AlwaysEnd); + + if (a == ExitMode::Unknown) { + return ExitMode::Unknown; + } + + if (a == ExitMode::AlwaysReturn) { + return b; + } + + if (b == ExitMode::Unknown || b == ExitMode::AlwaysEnd) { + return ExitMode::AlwaysEnd; + } + + return ExitMode::Conditional; +} + +ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels) { + // Initialize exit mode to unknown by default, in order to detect things like unending loops + auto [it, inserted] = exitMap.emplace(AddressRange(start, end), ExitMode::Unknown); + // Function has already been analyzed and is in the map so it wasn't added, don't analyze again + if (!inserted) { + return it->second; + } + + // Make sure not to go out of bounds on the shader + for (u32 pc = start; pc < PICAShader::maxInstructionCount && pc != end; pc++) { + const u32 instruction = shader.loadedShader[pc]; + const u32 opcode = instruction >> 26; + + switch (opcode) { + case ShaderOpcodes::JMPC: + case ShaderOpcodes::JMPU: { + const u32 dest = getBits<10, 12>(instruction); + // Register this jump address to our outLabels set + labels.insert(dest); + + // This opens up 2 parallel paths of execution + auto branchTakenExit = analyzeFunction(shader, dest, end, labels); + auto branchNotTakenExit = analyzeFunction(shader, pc + 1, end, labels); + it->second = exitParallel(branchTakenExit, branchNotTakenExit); + return it->second; + } + + case ShaderOpcodes::IFU: + case ShaderOpcodes::IFC: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + + const Function* branchTakenFunc = addFunction(shader, pc + 1, dest); + // Check if analysis of the branch taken func failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + // Next analyze the not taken func + ExitMode branchNotTakenExitMode = ExitMode::AlwaysReturn; + if (num != 0) { + const Function* branchNotTakenFunc = addFunction(shader, dest, dest + num); + // Check if analysis failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + branchNotTakenExitMode = branchNotTakenFunc->exitMode; + } + + auto parallel = exitParallel(branchTakenFunc->exitMode, branchNotTakenExitMode); + // Both branches of the if/else end, so there's nothing after the call + if (parallel == ExitMode::AlwaysEnd) { + it->second = parallel; + return it->second; + } else { + ExitMode afterConditional = analyzeFunction(shader, dest + num, end, labels); + ExitMode conditionalExitMode = exitSeries(parallel, afterConditional); + it->second = conditionalExitMode; + return it->second; + } + break; + } + + case ShaderOpcodes::CALL: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* calledFunction = addFunction(shader, dest, dest + num); + + // Check if analysis of the branch taken func failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + if (calledFunction->exitMode == ExitMode::AlwaysEnd) { + it->second = ExitMode::AlwaysEnd; + return it->second; + } + + // Exit mode of the remainder of this function, after we return from the callee + const ExitMode postCallExitMode = analyzeFunction(shader, pc + 1, end, labels); + const ExitMode exitMode = exitSeries(calledFunction->exitMode, postCallExitMode); + + it->second = exitMode; + return exitMode; + } + + case ShaderOpcodes::CALLC: + case ShaderOpcodes::CALLU: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* calledFunction = addFunction(shader, dest, dest + num); + + // Check if analysis of the branch taken func failed and return unknown if it did + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + // Exit mode of the remainder of this function, after we return from the callee + const ExitMode postCallExitMode = analyzeFunction(shader, pc + 1, end, labels); + const ExitMode exitMode = exitSeries(exitParallel(calledFunction->exitMode, ExitMode::AlwaysReturn), postCallExitMode); + + it->second = exitMode; + return exitMode; + } + + case ShaderOpcodes::LOOP: { + u32 dest = getBits<10, 12>(instruction); + const Function* loopFunction = addFunction(shader, pc + 1, dest + 1); + if (analysisFailed) { + it->second = ExitMode::Unknown; + return it->second; + } + + if (loopFunction->exitMode == ExitMode::AlwaysEnd) { + it->second = ExitMode::AlwaysEnd; + return it->second; + } + + const ExitMode afterLoop = analyzeFunction(shader, dest + 1, end, labels); + const ExitMode exitMode = exitSeries(loopFunction->exitMode, afterLoop); + it->second = exitMode; + return it->second; + } + + case ShaderOpcodes::END: it->second = ExitMode::AlwaysEnd; return it->second; + default: break; + } + } + + // A function without control flow instructions will always reach its "return point" and return + it->second = ExitMode::AlwaysReturn; + return it->second; +} + +std::pair ShaderDecompiler::compileRange(const AddressRange& range) { + u32 pc = range.start; + const u32 end = range.end >= range.start ? range.end : PICAShader::maxInstructionCount; + bool finished = false; + + while (pc < end && !finished) { + compileInstruction(pc, finished); + } + + return std::make_pair(pc, finished); +} + +const Function* ShaderDecompiler::findFunction(const AddressRange& range) { + for (const Function& func : controlFlow.functions) { + if (range.start == func.start && range.end == func.end) { + return &func; + } + } + + return nullptr; +} + +void ShaderDecompiler::writeAttributes() { + // Annoyingly, GLES does not support having an array as an input attribute, so declare each attribute separately for now + decompiledShader += R"( + layout(location = 0) in vec4 attr0; + layout(location = 1) in vec4 attr1; + layout(location = 2) in vec4 attr2; + layout(location = 3) in vec4 attr3; + layout(location = 4) in vec4 attr4; + layout(location = 5) in vec4 attr5; + layout(location = 6) in vec4 attr6; + layout(location = 7) in vec4 attr7; + layout(location = 8) in vec4 attr8; + layout(location = 9) in vec4 attr9; + layout(location = 10) in vec4 attr10; + layout(location = 11) in vec4 attr11; + layout(location = 12) in vec4 attr12; + layout(location = 13) in vec4 attr13; + layout(location = 14) in vec4 attr14; + layout(location = 15) in vec4 attr15; + + layout(std140) uniform PICAShaderUniforms { + vec4 uniform_f[96]; + uvec4 uniform_i; + uint uniform_bool; + }; + + vec4 temp[16]; + vec4 out_regs[16]; + vec4 dummy_vec = vec4(0.0); + ivec3 addr_reg = ivec3(0); + bvec2 cmp_reg = bvec2(false); + + vec4 uniform_indexed(int source, int offset) { + int clipped_offs = (offset >= -128 && offset <= 127) ? offset : 0; + uint index = uint(clipped_offs + source) & 127u; + return (index < 96u) ? uniform_f[index] : vec4(1.0); + } +)"; +} + +std::string ShaderDecompiler::decompile() { + controlFlow.analyze(shader, entrypoint); + + if (controlFlow.analysisFailed) { + return ""; + } + + compilationError = false; + decompiledShader.clear(); + // Reserve some memory for the shader string to avoid memory allocations + decompiledShader.reserve(256 * 1024); + + switch (api) { + case API::GL: decompiledShader += "#version 410 core\n"; break; + case API::GLES: decompiledShader += "#version 300 es\nprecision mediump float;\nprecision mediump int;\n"; break; + default: break; + } + + writeAttributes(); + + if (config.accurateShaderMul) { + // Safe multiplication handler from Citra: Handles the PICA's 0 * inf = 0 edge case + decompiledShader += R"( + vec4 safe_mul(vec4 a, vec4 b) { + vec4 res = a * b; + return mix(res, mix(mix(vec4(0.0), res, isnan(b)), res, isnan(a)), isnan(res)); + } + )"; + } + + // Forward declare every generated function first so that we can easily call anything from anywhere. + for (auto& func : controlFlow.functions) { + decompiledShader += func.getForwardDecl(); + } + + decompiledShader += "void pica_shader_main() {\n"; + AddressRange mainFunctionRange(entrypoint, PICAShader::maxInstructionCount); + auto mainFunc = findFunction(mainFunctionRange); + + decompiledShader += mainFunc->getCallStatement() + ";\n}\n"; + + for (const Function& func : controlFlow.functions) { + if (func.outLabels.empty()) { + decompiledShader += fmt::format("bool {}() {{\n", func.getIdentifier()); + + auto [pc, finished] = compileRange(AddressRange(func.start, func.end)); + if (!finished) { + decompiledShader += "return false;"; + } + + decompiledShader += "}\n"; + } else { + auto labels = func.outLabels; + labels.insert(func.start); + + // If a function has jumps and "labels", this needs to be emulated using a switch-case, with the variable being switched on being the + // current PC + decompiledShader += fmt::format("bool {}() {{\n", func.getIdentifier()); + decompiledShader += fmt::format("uint pc = {}u;\n", func.start); + decompiledShader += "while(true){\nswitch(pc){\n"; + + for (u32 label : labels) { + decompiledShader += fmt::format("case {}u: {{", label); + // Fetch the next label whose address > label + auto it = labels.lower_bound(label + 1); + u32 next = (it == labels.end()) ? func.end : *it; + + auto [endPC, finished] = compileRange(AddressRange(label, next)); + if (endPC > next && !finished) { + labels.insert(endPC); + decompiledShader += fmt::format("pc = {}u; break;", endPC); + } + + // Fallthrough to next label + decompiledShader += "}\n"; + } + + decompiledShader += "default: return false;\n"; + // Exit the switch and loop + decompiledShader += "} }\n"; + + // Exit the function + decompiledShader += "return false;\n"; + decompiledShader += "}\n"; + } + } + + // We allow some leeway for "compilation errors" in addition to control flow errors, in cases where eg an unimplemented instruction + // or an instruction that we can't emulate in GLSL is found in the instruction stream. Just like control flow errors, these return an empty string + // and the renderer core will decide to use CPU shaders instead + if (compilationError) [[unlikely]] { + return ""; + } + + return decompiledShader; +} + +std::string ShaderDecompiler::getSource(u32 source, [[maybe_unused]] u32 index) const { + if (source < 0x10) { + return "attr" + std::to_string(source); + } else if (source < 0x20) { + return "temp[" + std::to_string(source - 0x10) + "]"; + } else { + const usize floatIndex = (source - 0x20) & 0x7f; + + if (index == 0) { + if (floatIndex >= 96) [[unlikely]] { + return "dummy_vec"; + } + return "uniform_f[" + std::to_string(floatIndex) + "]"; + } else { + static constexpr std::array offsets = {"0", "addr_reg.x", "addr_reg.y", "addr_reg.z"}; + return fmt::format("uniform_indexed({}, {})", floatIndex, offsets[index]); + } + } +} + +std::string ShaderDecompiler::getDest(u32 dest) const { + if (dest < 0x10) { + return "out_regs[" + std::to_string(dest) + "]"; + } else if (dest < 0x20) { + return "temp[" + std::to_string(dest - 0x10) + "]"; + } else { + return "dummy_vec"; + } +} + +std::string ShaderDecompiler::getSwizzlePattern(u32 swizzle) const { + // If the swizzle field is this value then the swizzle pattern is .xyzw so we don't need a shuffle + static constexpr uint noSwizzle = 0x1B; + if (swizzle == noSwizzle) { + return ""; + } + + static constexpr std::array names = {'x', 'y', 'z', 'w'}; + std::string ret(". "); + + for (int i = 0; i < 4; i++) { + ret[3 - i + 1] = names[swizzle & 0x3]; + swizzle >>= 2; + } + + return ret; +} + +std::string ShaderDecompiler::getDestSwizzle(u32 destinationMask) const { + std::string ret = "."; + if (destinationMask & 0b1000) { + ret += "x"; + } + + if (destinationMask & 0b100) { + ret += "y"; + } + + if (destinationMask & 0b10) { + ret += "z"; + } + + if (destinationMask & 0b1) { + ret += "w"; + } + + return ret; +} + +void ShaderDecompiler::setDest(u32 operandDescriptor, const std::string& dest, const std::string& value) { + u32 destinationMask = operandDescriptor & 0xF; + + std::string destSwizzle = getDestSwizzle(destinationMask); + // We subtract 1 for the "." character of the swizzle descriptor + u32 writtenLaneCount = destSwizzle.size() - 1; + + // All lanes are masked out, so the operation is a nop. + if (writtenLaneCount == 0) { + return; + } + + // Don't write destination swizzle if all lanes are getting written to + decompiledShader += fmt::format("{}{} = ", dest, writtenLaneCount == 4 ? "" : destSwizzle); + if (writtenLaneCount <= 3) { + decompiledShader += fmt::format("({}){};\n", value, destSwizzle); + } else if (writtenLaneCount == 4) { + decompiledShader += fmt::format("{};\n", value); + } +} + +void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) { + const u32 instruction = shader.loadedShader[pc]; + const u32 opcode = instruction >> 26; + + if (usesCommonEncoding(instruction)) { + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f]; + const bool invertSources = (opcode == ShaderOpcodes::SLTI || opcode == ShaderOpcodes::SGEI || opcode == ShaderOpcodes::DPHI); + + // src1 and src2 indexes depend on whether this is one of the inverting instructions or not + const u32 src1Index = invertSources ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction); + const u32 src2Index = invertSources ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction); + + const u32 idx = getBits<19, 2>(instruction); + const u32 destIndex = getBits<21, 5>(instruction); + + const bool negate1 = (getBit<4>(operandDescriptor)) != 0; + const u32 swizzle1 = getBits<5, 8>(operandDescriptor); + const bool negate2 = (getBit<13>(operandDescriptor)) != 0; + const u32 swizzle2 = getBits<14, 8>(operandDescriptor); + + std::string src1 = negate1 ? "-" : ""; + src1 += getSource(src1Index, invertSources ? 0 : idx); + src1 += getSwizzlePattern(swizzle1); + + std::string src2 = negate2 ? "-" : ""; + src2 += getSource(src2Index, invertSources ? idx : 0); + src2 += getSwizzlePattern(swizzle2); + + std::string dest = getDest(destIndex); + + switch (opcode) { + case ShaderOpcodes::MOV: setDest(operandDescriptor, dest, src1); break; + case ShaderOpcodes::ADD: setDest(operandDescriptor, dest, fmt::format("{} + {}", src1, src2)); break; + case ShaderOpcodes::MUL: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("{} * {}", src1, src2)); + } else { + setDest(operandDescriptor, dest, fmt::format("safe_mul({}, {})", src1, src2)); + } + break; + case ShaderOpcodes::MAX: setDest(operandDescriptor, dest, fmt::format("max({}, {})", src1, src2)); break; + case ShaderOpcodes::MIN: setDest(operandDescriptor, dest, fmt::format("min({}, {})", src1, src2)); break; + + case ShaderOpcodes::DP3: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("vec4(dot({}.xyz, {}.xyz))", src1, src2)); + } else { + // A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec3(1.0) + setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul({}, {}).xyz, vec3(1.0)))", src1, src2)); + } + break; + case ShaderOpcodes::DP4: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("vec4(dot({}, {}))", src1, src2)); + } else { + // A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec4(1.0) + setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul({}, {}), vec4(1.0)))", src1, src2)); + } + break; + case ShaderOpcodes::FLR: setDest(operandDescriptor, dest, fmt::format("floor({})", src1)); break; + case ShaderOpcodes::RSQ: setDest(operandDescriptor, dest, fmt::format("vec4(inversesqrt({}.x))", src1)); break; + case ShaderOpcodes::RCP: setDest(operandDescriptor, dest, fmt::format("vec4(1.0 / {}.x)", src1)); break; + case ShaderOpcodes::LG2: setDest(operandDescriptor, dest, fmt::format("vec4(log2({}.x))", src1)); break; + case ShaderOpcodes::EX2: setDest(operandDescriptor, dest, fmt::format("vec4(exp2({}.x))", src1)); break; + + case ShaderOpcodes::SLT: + case ShaderOpcodes::SLTI: setDest(operandDescriptor, dest, fmt::format("vec4(lessThan({}, {}))", src1, src2)); break; + + case ShaderOpcodes::SGE: + case ShaderOpcodes::SGEI: setDest(operandDescriptor, dest, fmt::format("vec4(greaterThanEqual({}, {}))", src1, src2)); break; + + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("vec4(dot(vec4({}.xyz, 1.0), {}))", src1, src2)); + } else { + // A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec4(1.0) + setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul(vec4({}.xyz, 1.0), {}), vec4(1.0)))", src1, src2)); + } + break; + + case ShaderOpcodes::CMP1: + case ShaderOpcodes::CMP2: { + static constexpr std::array operators = { + // The last 2 operators always return true and are handled specially + "==", "!=", "<", "<=", ">", ">=", "", "", + }; + + const u32 cmpY = getBits<21, 3>(instruction); + const u32 cmpX = getBits<24, 3>(instruction); + + // Compare x first + if (cmpX >= 6) { + decompiledShader += "cmp_reg.x = true;\n"; + } else { + decompiledShader += fmt::format("cmp_reg.x = {}.x {} {}.x;\n", src1, operators[cmpX], src2); + } + + // Then compare Y + if (cmpY >= 6) { + decompiledShader += "cmp_reg.y = true;\n"; + } else { + decompiledShader += fmt::format("cmp_reg.y = {}.y {} {}.y;\n", src1, operators[cmpY], src2); + } + break; + } + + case ShaderOpcodes::MOVA: { + const bool writeX = getBit<3>(operandDescriptor); // Should we write the x component of the address register? + const bool writeY = getBit<2>(operandDescriptor); + + if (writeX && writeY) { + decompiledShader += fmt::format("addr_reg.xy = ivec2({}.xy);\n", src1); + } else if (writeX) { + decompiledShader += fmt::format("addr_reg.x = int({}.x);\n", src1); + } else if (writeY) { + decompiledShader += fmt::format("addr_reg.y = int({}.y);\n", src1); + } + break; + } + + default: + Helpers::warn("GLSL recompiler: Unknown common opcode: %02X. Falling back to CPU shaders", opcode); + compilationError = true; + break; + } + } else if (opcode >= 0x30 && opcode <= 0x3F) { // MAD and MADI + const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x1f]; + const bool isMADI = getBit<29>(instruction) == 0; // We detect MADI based on bit 29 of the instruction + + // src1 and src2 indexes depend on whether this is one of the inverting instructions or not + const u32 src1Index = getBits<17, 5>(instruction); + const u32 src2Index = isMADI ? getBits<12, 5>(instruction) : getBits<10, 7>(instruction); + const u32 src3Index = isMADI ? getBits<5, 7>(instruction) : getBits<5, 5>(instruction); + const u32 idx = getBits<22, 2>(instruction); + const u32 destIndex = getBits<24, 5>(instruction); + + const bool negate1 = (getBit<4>(operandDescriptor)) != 0; + const u32 swizzle1 = getBits<5, 8>(operandDescriptor); + const bool negate2 = (getBit<13>(operandDescriptor)) != 0; + const u32 swizzle2 = getBits<14, 8>(operandDescriptor); + + const bool negate3 = (getBit<22>(operandDescriptor)) != 0; + const u32 swizzle3 = getBits<23, 8>(operandDescriptor); + + std::string src1 = negate1 ? "-" : ""; + src1 += getSource(src1Index, 0); + src1 += getSwizzlePattern(swizzle1); + + std::string src2 = negate2 ? "-" : ""; + src2 += getSource(src2Index, isMADI ? 0 : idx); + src2 += getSwizzlePattern(swizzle2); + + std::string src3 = negate3 ? "-" : ""; + src3 += getSource(src3Index, isMADI ? idx : 0); + src3 += getSwizzlePattern(swizzle3); + + std::string dest = getDest(destIndex); + if (!config.accurateShaderMul) { + setDest(operandDescriptor, dest, fmt::format("{} * {} + {}", src1, src2, src3)); + } else { + setDest(operandDescriptor, dest, fmt::format("safe_mul({}, {}) + {}", src1, src2, src3)); + } + } else { + switch (opcode) { + case ShaderOpcodes::JMPC: { + const u32 dest = getBits<10, 12>(instruction); + const u32 condOp = getBits<22, 2>(instruction); + const uint refY = getBit<24>(instruction); + const uint refX = getBit<25>(instruction); + const char* condition = getCondition(condOp, refX, refY); + + decompiledShader += fmt::format("if ({}) {{ pc = {}u; break; }}\n", condition, dest); + break; + } + + case ShaderOpcodes::JMPU: { + const u32 dest = getBits<10, 12>(instruction); + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 mask = 1u << bit; + const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we jump if bit = 1, otherwise 0 + + decompiledShader += fmt::format("if ((uniform_bool & {}u) {} 0u) {{ pc = {}u; break; }}\n", mask, (test != 0) ? "!=" : "==", dest); + break; + } + + case ShaderOpcodes::IFU: + case ShaderOpcodes::IFC: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* conditionalFunc = findFunction(AddressRange(pc + 1, dest)); + + if (opcode == ShaderOpcodes::IFC) { + const u32 condOp = getBits<22, 2>(instruction); + const uint refY = getBit<24>(instruction); + const uint refX = getBit<25>(instruction); + const char* condition = getCondition(condOp, refX, refY); + + decompiledShader += fmt::format("if ({}) {{", condition); + } else { + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 mask = 1u << bit; + + decompiledShader += fmt::format("if ((uniform_bool & {}u) != 0u) {{", mask); + } + + callFunction(*conditionalFunc); + decompiledShader += "}\n"; + + pc = dest; + if (num > 0) { + const Function* elseFunc = findFunction(AddressRange(dest, dest + num)); + pc = dest + num; + + decompiledShader += "else { "; + callFunction(*elseFunc); + decompiledShader += "}\n"; + + if (conditionalFunc->exitMode == ExitMode::AlwaysEnd && elseFunc->exitMode == ExitMode::AlwaysEnd) { + finished = true; + return; + } + } + + return; + } + + case ShaderOpcodes::CALL: + case ShaderOpcodes::CALLC: + case ShaderOpcodes::CALLU: { + const u32 num = instruction & 0xff; + const u32 dest = getBits<10, 12>(instruction); + const Function* calledFunc = findFunction(AddressRange(dest, dest + num)); + + // Handle conditions for CALLC/CALLU + if (opcode == ShaderOpcodes::CALLC) { + const u32 condOp = getBits<22, 2>(instruction); + const uint refY = getBit<24>(instruction); + const uint refX = getBit<25>(instruction); + const char* condition = getCondition(condOp, refX, refY); + + decompiledShader += fmt::format("if ({}) {{", condition); + } else if (opcode == ShaderOpcodes::CALLU) { + const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check + const u32 mask = 1u << bit; + + decompiledShader += fmt::format("if ((uniform_bool & {}u) != 0u) {{", mask); + } + + callFunction(*calledFunc); + + // Close brackets for CALLC/CALLU + if (opcode != ShaderOpcodes::CALL) { + decompiledShader += "}"; + } + + if (opcode == ShaderOpcodes::CALL && calledFunc->exitMode == ExitMode::AlwaysEnd) { + finished = true; + return; + } + break; + } + + case ShaderOpcodes::LOOP: { + const u32 dest = getBits<10, 12>(instruction); + const u32 uniformIndex = getBits<22, 2>(instruction); + + // loop counter = uniform.y + decompiledShader += fmt::format("addr_reg.z = int((uniform_i[{}] >> 8u) & 0xFFu);\n", uniformIndex); + decompiledShader += fmt::format( + "for (uint loopCtr{} = 0u; loopCtr{} <= (uniform_i[{}] & 0xFFu); loopCtr{}++, addr_reg.z += int((uniform_i[{}] >> " + "16u) & 0xFFu)) {{\n", + pc, pc, uniformIndex, pc, uniformIndex + ); + + AddressRange range(pc + 1, dest + 1); + const Function* func = findFunction(range); + callFunction(*func); + decompiledShader += "}\n"; + + // Jump to the end of the loop. We don't want to compile the code inside the loop again. + // This will be incremented by 1 due to the pc++ at the end of this loop. + pc = dest; + + if (func->exitMode == ExitMode::AlwaysEnd) { + finished = true; + return; + } + break; + } + + case ShaderOpcodes::END: + decompiledShader += "return true;\n"; + finished = true; + return; + + case ShaderOpcodes::NOP: break; + + default: + Helpers::warn("GLSL recompiler: Unknown opcode: %02X. Falling back to CPU shaders", opcode); + compilationError = true; + break; + } + } + + pc++; +} + +bool ShaderDecompiler::usesCommonEncoding(u32 instruction) const { + const u32 opcode = instruction >> 26; + switch (opcode) { + case ShaderOpcodes::ADD: + case ShaderOpcodes::CMP1: + case ShaderOpcodes::CMP2: + case ShaderOpcodes::MUL: + case ShaderOpcodes::MIN: + case ShaderOpcodes::MAX: + case ShaderOpcodes::FLR: + case ShaderOpcodes::DP3: + case ShaderOpcodes::DP4: + case ShaderOpcodes::DPH: + case ShaderOpcodes::DPHI: + case ShaderOpcodes::LG2: + case ShaderOpcodes::EX2: + case ShaderOpcodes::RCP: + case ShaderOpcodes::RSQ: + case ShaderOpcodes::MOV: + case ShaderOpcodes::MOVA: + case ShaderOpcodes::SLT: + case ShaderOpcodes::SLTI: + case ShaderOpcodes::SGE: + case ShaderOpcodes::SGEI: + case ShaderOpcodes::LITP: return true; + + default: return false; + } +} + +void ShaderDecompiler::callFunction(const Function& function) { + switch (function.exitMode) { + // This function always ends, so call it and return true to signal that we're gonna be ending the shader + case ExitMode::AlwaysEnd: decompiledShader += function.getCallStatement() + ";\nreturn true;\n"; break; + // This function will potentially end. Call it, see if it returns that it ended, and return that we're ending if it did + case ExitMode::Conditional: decompiledShader += fmt::format("if ({}) {{ return true; }}\n", function.getCallStatement()); break; + // This function will not end. Just call it like a normal function. + default: decompiledShader += function.getCallStatement() + ";\n"; break; + } +} + +std::string ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) { + ShaderDecompiler decompiler(shader, config, entrypoint, api, language); + + return decompiler.decompile(); +} + +const char* ShaderDecompiler::getCondition(u32 cond, u32 refX, u32 refY) { + static constexpr std::array conditions = { + // ref(Y, X) = (0, 0) + "!all(cmp_reg)", + "all(not(cmp_reg))", + "!cmp_reg.x", + "!cmp_reg.y", + + // ref(Y, X) = (0, 1) + "cmp_reg.x || !cmp_reg.y", + "cmp_reg.x && !cmp_reg.y", + "cmp_reg.x", + "!cmp_reg.y", + + // ref(Y, X) = (1, 0) + "!cmp_reg.x || cmp_reg.y", + "!cmp_reg.x && cmp_reg.y", + "!cmp_reg.x", + "cmp_reg.y", + + // ref(Y, X) = (1, 1) + "any(cmp_reg)", + "all(cmp_reg)", + "cmp_reg.x", + "cmp_reg.y", + }; + const u32 key = (cond & 0b11) | (refX << 2) | (refY << 3); + + return conditions[key]; +} diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp new file mode 100644 index 00000000..a9ad3a90 --- /dev/null +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -0,0 +1,839 @@ +#include + +#include + +#include "PICA/pica_frag_config.hpp" +#include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" + +// We can include the driver headers here since they shouldn't have any actual API-specific code +#include "renderer_gl/gl_driver.hpp" + +using namespace PICA; +using namespace PICA::ShaderGen; + +// Note: We upload global ambient and fog colour as u32 and decode on the GPU +// This shouldn't matter much for GPU performance, especially fog since it's relatively rare +static constexpr const char* uniformDefinition = R"( + struct LightSource { + vec3 specular0; + vec3 specular1; + vec3 diffuse; + vec3 ambient; + vec3 position; + vec3 spotlightDirection; + float distanceAttenuationBias; + float distanceAttenuationScale; + }; + + layout(std140) uniform FragmentUniforms { + int alphaReference; + float depthScale; + float depthOffset; + + vec4 constantColors[6]; + vec4 tevBufferColor; + vec4 clipCoords; + uint globalAmbientLight; + uint inFogColor; + LightSource lightSources[8]; + }; +)"; + +std::string FragmentGenerator::getDefaultVertexShader() { + std::string ret = ""; + // Reserve some space (128KB) in the output string to avoid too many allocations later + ret.reserve(128 * 1024); + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + + ret += uniformDefinition; + + ret += R"( + layout(location = 0) in vec4 a_coords; + layout(location = 1) in vec4 a_quaternion; + layout(location = 2) in vec4 a_vertexColour; + layout(location = 3) in vec2 a_texcoord0; + layout(location = 4) in vec2 a_texcoord1; + layout(location = 5) in float a_texcoord0_w; + layout(location = 6) in vec3 a_view; + layout(location = 7) in vec2 a_texcoord2; + + out vec4 v_quaternion; + out vec4 v_colour; + out vec3 v_texcoord0; + out vec2 v_texcoord1; + out vec3 v_view; + out vec2 v_texcoord2; + + #ifndef USING_GLES + out float gl_ClipDistance[2]; + #endif + + void main() { + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + v_quaternion = a_quaternion; + + #ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); + #endif + } +)"; + + return ret; +} + +std::string FragmentGenerator::generate(const FragmentConfig& config, void* driverInfo) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + // For GLES we need to enable & use the framebuffer fetch extension in order to emulate logic ops + bool emitLogicOps = api == API::GLES && config.outConfig.logicOpMode != PICA::LogicOpMode::Copy && driverInfo != nullptr; + + if (emitLogicOps) { + auto driver = static_cast(driverInfo); + + // If the driver does not support framebuffer fetch at all, don't emit logic op code + if (!driver->supportFbFetch()) { + emitLogicOps = false; + } + + // Figure out which fb fetch extension we have and enable it + else { + if (driver->supportsExtFbFetch) { + ret += "\n#extension GL_EXT_shader_framebuffer_fetch : enable\n#define fb_color fragColor\n"; + } else if (driver->supportsArmFbFetch) { + ret += "\n#extension GL_ARM_shader_framebuffer_fetch : enable\n#define fb_color gl_LastFragColorARM[0]\n"; + } + } + } + + bool unimplementedFlag = false; + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + #define fma(a, b, c) ((a) * (b) + (c)) + + precision mediump int; + precision mediump float; + )"; + } + + // Input and output attributes + ret += R"( + in vec4 v_quaternion; + in vec4 v_colour; + in vec3 v_texcoord0; + in vec2 v_texcoord1; + in vec3 v_view; + in vec2 v_texcoord2; + + out vec4 fragColor; + uniform sampler2D u_tex0; + uniform sampler2D u_tex1; + uniform sampler2D u_tex2; + uniform sampler2D u_tex_luts; + )"; + + ret += uniformDefinition; + + if (config.lighting.enable) { + ret += R"( + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; + } + + vec3 regToColor(uint reg) { + return (1.0 / 255.0) * vec3(float((reg >> 20u) & 0xFFu), float((reg >> 10u) & 0xFFu), float(reg & 0xFFu)); + } + )"; + } + + // Emit main function for fragment shader + // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour + ret += R"( + void main() { + vec4 combinerOutput = v_colour; + vec4 previousBuffer = vec4(0.0); + vec4 tevNextPreviousBuffer = tevBufferColor; + + vec4 primaryColor = vec4(0.0); + vec4 secondaryColor = vec4(0.0); + )"; + + compileLights(ret, config); + + ret += R"( + vec3 colorOp1 = vec3(0.0); + vec3 colorOp2 = vec3(0.0); + vec3 colorOp3 = vec3(0.0); + + float alphaOp1 = 0.0; + float alphaOp2 = 0.0; + float alphaOp3 = 0.0; + )"; + + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] + ret += R"( + float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; + float depth = z_over_w * depthScale + depthOffset; + )"; + + if (!config.outConfig.depthMapEnable) { + ret += "depth /= gl_FragCoord.w;\n"; + } + + ret += "gl_FragDepth = depth;\n"; + + for (int i = 0; i < 6; i++) { + compileTEV(ret, i, config); + } + + compileFog(ret, config); + applyAlphaTest(ret, config); + + if (!emitLogicOps) { + ret += "fragColor = combinerOutput;\n}"; // End of main function + } else { + compileLogicOps(ret, config); + } + + return ret; +} + +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config) { + const u32* tevValues = config.texConfig.tevConfigs.data() + stage * 4; + + // Pass a 0 to constColor here, as it doesn't matter for compilation + TexEnvConfig tev(tevValues[0], tevValues[1], tevValues[2], 0, tevValues[3]); + + if (!tev.isPassthroughStage()) { + // Get color operands + shader += "colorOp1 = "; + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage, config); + + shader += ";\ncolorOp2 = "; + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage, config); + + shader += ";\ncolorOp3 = "; + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage, config); + + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; + getColorOperation(shader, tev.colorOp); + shader += ", vec3(0.0), vec3(1.0));\n"; + + if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { + // Dot3 RGBA also writes to the alpha component so we don't need to do anything more + shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n"; + } else { + // Get alpha operands + shader += "alphaOp1 = "; + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage, config); + + shader += ";\nalphaOp2 = "; + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage, config); + + shader += ";\nalphaOp3 = "; + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage, config); + + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; + getAlphaOperation(shader, tev.alphaOp); + // Clamp the alpha value to [0.0, 1.0] + shader += ", 0.0, 1.0);\n"; + } + + shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + + ".0, 0.0, 1.0));\n"; + } + + shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; + + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = config.texConfig.texEnvUpdateBuffer; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } + + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; + } + } +} + +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config) { + using OperandType = TexEnvConfig::ColorOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || + color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) { + shader += "vec3(1.0, 1.0, 1.0) - "; + } + + switch (color) { + case OperandType::SourceColor: + case OperandType::OneMinusSourceColor: + getSource(shader, source, index, config); + shader += ".rgb"; + break; + + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index, config); + shader += ".rrr"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index, config); + shader += ".ggg"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index, config); + shader += ".bbb"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index, config); + shader += ".aaa"; + break; + + default: + shader += "vec3(1.0, 1.0, 1.0)"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index, const PICA::FragmentConfig& config) { + using OperandType = TexEnvConfig::AlphaOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue || + color == OperandType::OneMinusSourceAlpha) { + shader += "1.0 - "; + } + + switch (color) { + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index, config); + shader += ".r"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index, config); + shader += ".g"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index, config); + shader += ".b"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index, config); + shader += ".a"; + break; + + default: + shader += "1.0"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config) { + switch (source) { + case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; + case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; + case TexEnvConfig::Source::Texture2: { + // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 + if (Helpers::getBit<13>(config.texConfig.texUnitConfig)) { + shader += "texture(u_tex2, v_texcoord1)"; + } else { + shader += "texture(u_tex2, v_texcoord2)"; + } + break; + } + + case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; + case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; + case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; + + // Lighting + case TexEnvConfig::Source::PrimaryFragmentColor: shader += "primaryColor"; break; + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "secondaryColor"; break; + + default: + Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); + shader += "vec4(1.0, 1.0, 1.0, 1.0)"; + break; + } +} + +void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; + case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break; + case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented color op"); + shader += "vec3(1.0)"; + break; + } +} + +void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; + case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break; + case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented alpha op"); + shader += "1.0"; + break; + } +} + +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config) { + const CompareFunction function = config.outConfig.alphaTestFunction; + + // Alpha test disabled + if (function == CompareFunction::Always) { + return; + } + + shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n"; + shader += "if ("; + switch (function) { + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break; + case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break; + case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break; + case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break; + case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break; + case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break; + + default: + Helpers::warn("Unimplemented alpha test function"); + shader += "false"; + break; + } + + shader += ") { discard; }\n"; +} + +void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentConfig& config) { + if (!config.lighting.enable) { + return; + } + + // Currently ignore bump mode + shader += "vec3 normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion);\n"; + shader += R"( + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec3 light_position, light_vector, half_vector, specular0, specular1, reflected_color; + + float light_distance, NdotL, light_factor, geometric_factor, distance_attenuation, distance_att_delta; + float spotlight_attenuation, specular0_dist, specular1_dist; + float lut_lookup_result, lut_lookup_delta; + int lut_lookup_index; + )"; + + uint lightID = 0; + + for (int i = 0; i < config.lighting.lightNum; i++) { + lightID = config.lighting.lights[i].num; + + const auto& lightConfig = config.lighting.lights[i]; + shader += "light_position = lightSources[" + std::to_string(lightID) + "].position;\n"; + + if (lightConfig.directional) { // Directional lighting + shader += "light_vector = light_position;\n"; + } else { // Positional lighting + shader += "light_vector = light_position + v_view;\n"; + } + + shader += R"( + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); + + distance_attenuation = 1.0; + NdotL = dot(normal, light_vector); + )"; + + shader += lightConfig.twoSidedDiffuse ? "NdotL = abs(NdotL);\n" : "NdotL = max(NdotL, 0.0);\n"; + + if (lightConfig.geometricFactor0 || lightConfig.geometricFactor1) { + shader += R"( + geometric_factor = dot(half_vector, half_vector); + geometric_factor = (geometric_factor == 0.0) ? 0.0 : min(NdotL / geometric_factor, 1.0); + )"; + } + + if (lightConfig.distanceAttenuationEnable) { + shader += "distance_att_delta = clamp(light_distance * lightSources[" + std::to_string(lightID) + + "].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n"; + + shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) + + "u, int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n"; + } + + compileLUTLookup(shader, config, i, spotlightLutIndex); + shader += "spotlight_attenuation = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D0); + shader += "specular0_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_D1); + shader += "specular1_dist = lut_lookup_result;\n"; + + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RR); + shader += "reflected_color.r = lut_lookup_result;\n"; + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RG)) { + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RG); + shader += "reflected_color.g = lut_lookup_result;\n"; + } else { + shader += "reflected_color.g = reflected_color.r;\n"; + } + + if (isSamplerEnabled(config.lighting.config, PICA::Lights::LUT_RB)) { + compileLUTLookup(shader, config, i, PICA::Lights::LUT_RB); + shader += "reflected_color.b = lut_lookup_result;\n"; + } else { + shader += "reflected_color.b = reflected_color.r;\n"; + } + + shader += "specular0 = lightSources[" + std::to_string(lightID) + "].specular0 * specular0_dist;\n"; + if (lightConfig.geometricFactor0) { + shader += "specular0 *= geometric_factor;\n"; + } + + shader += "specular1 = lightSources[" + std::to_string(lightID) + "].specular1 * specular1_dist * reflected_color;\n"; + if (lightConfig.geometricFactor1) { + shader += "specular1 *= geometric_factor;\n"; + } + + shader += "light_factor = distance_attenuation * spotlight_attenuation;\n"; + + if (config.lighting.clampHighlights) { + shader += "specular_sum.rgb += light_factor * (NdotL == 0.0 ? 0.0 : 1.0) * (specular0 + specular1);\n"; + } else { + shader += "specular_sum.rgb += light_factor * (specular0 + specular1);\n"; + } + + shader += "diffuse_sum.rgb += light_factor * (lightSources[" + std::to_string(lightID) + "].ambient + lightSources[" + + std::to_string(lightID) + "].diffuse * NdotL);\n"; + } + + if (config.lighting.enablePrimaryAlpha || config.lighting.enableSecondaryAlpha) { + compileLUTLookup(shader, config, config.lighting.lightNum - 1, PICA::Lights::LUT_FR); + shader += "float fresnel_factor = lut_lookup_result;\n"; + } + + if (config.lighting.enablePrimaryAlpha) { + shader += "diffuse_sum.a = fresnel_factor;\n"; + } + + if (config.lighting.enableSecondaryAlpha) { + shader += "specular_sum.a = fresnel_factor;\n"; + } + + shader += R"( + vec4 global_ambient = vec4(regToColor(globalAmbientLight), 1.0); + + primaryColor = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondaryColor = clamp(specular_sum, vec4(0.0), vec4(1.0)); + )"; +} + +bool FragmentGenerator::isSamplerEnabled(u32 environmentID, u32 lutID) { + static constexpr bool samplerEnabled[9 * 7] = { + // D0 D1 SP FR RB RG RR + true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR + false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR + true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR + true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR + true, true, true, false, true, true, true, // Configuration 4: All except for FR + true, false, true, true, true, true, true, // Configuration 5: All except for D1 + true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG + false, false, false, false, false, false, false, // Configuration 7: Unused + true, true, true, true, true, true, true, // Configuration 8: All + }; + + return samplerEnabled[environmentID * 7 + lutID]; +} + +void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID) { + const LightingLUTConfig& lut = config.lighting.luts[lutID]; + uint lightID = config.lighting.lights[lightIndex].num; + uint lutIndex = 0; + bool lutEnabled = false; + + if (lutID == spotlightLutIndex) { + // These are the spotlight attenuation LUTs + lutIndex = 8u + lightID; + lutEnabled = config.lighting.lights[lightIndex].spotAttenuationEnable; + } else if (lutID <= 6) { + lutIndex = lutID; + lutEnabled = lut.enable; + } else { + Helpers::warn("Shadergen: Unimplemented LUT value"); + } + + const bool samplerEnabled = isSamplerEnabled(config.lighting.config, lutID); + + if (!samplerEnabled || !lutEnabled) { + shader += "lut_lookup_result = 1.0;\n"; + return; + } + + uint scale = lut.scale; + uint inputID = lut.type; + bool absEnabled = lut.absInput; + + switch (inputID) { + case 0: shader += "lut_lookup_delta = dot(normal, normalize(half_vector));\n"; break; + case 1: shader += "lut_lookup_delta = dot(normalize(v_view), normalize(half_vector));\n"; break; + case 2: shader += "lut_lookup_delta = dot(normal, normalize(v_view));\n"; break; + case 3: shader += "lut_lookup_delta = dot(normal, light_vector);\n"; break; + case 4: shader += "lut_lookup_delta = dot(light_vector, lightSources[" + std ::to_string(lightID) + "].spotlightDirection);\n"; break; + + default: + Helpers::warn("Shadergen: Unimplemented LUT select %d", inputID); + shader += "lut_lookup_delta = 1.0;\n"; + break; + } + + static constexpr float scales[] = {1.0f, 2.0f, 4.0f, 8.0f, 0.0f, 0.0f, 0.25f, 0.5f}; + + if (absEnabled) { + bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse; + shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + "u, int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n"; + if (scale != 0) { + shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; + } + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n"; + shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n"; + shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + "u, lut_lookup_index);\n"; + if (scale != 0) { + shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n"; + } + } +} + +void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConfig& config) { + if (config.fogConfig.mode != FogMode::Fog) { + return; + } + + if (config.fogConfig.flipDepth) { + shader += "float fog_index = (1.0 - depth) * 128.0;\n"; + } else { + shader += "float fog_index = depth * 128.0;\n"; + } + + shader += "float clamped_index = clamp(floor(fog_index), 0.0, 127.0);"; + shader += "float delta = fog_index - clamped_index;"; + shader += "vec3 fog_color = (1.0 / 255.0) * vec3(float(inFogColor & 0xffu), float((inFogColor >> 8u) & 0xffu), float((inFogColor >> 16u) & 0xffu));"; + shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs + shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);"; + shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);"; +} + +std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader) { + // First, calculate output register -> Fixed function fragment semantics based on the VAO config + // This array contains the mappings for the 32 fixed function semantics (8 variables, with 4 lanes each). + // Each entry is a pair, containing the output reg to use for this semantic (first) and which lane of that register (second) + std::array, 32> outputMappings{}; + // Output registers adjusted according to VS_OUTPUT_MASK, which handles enabling and disabling output attributes + std::array vsOutputRegisters; + + { + uint count = 0; + u16 outputMask = vertConfig.outputMask; + + // See which registers are actually enabled and ignore the disabled ones + for (int i = 0; i < 16; i++) { + if (outputMask & 1) { + vsOutputRegisters[count++] = i; + } + + outputMask >>= 1; + } + + // For the others, map the index to a vs output directly (TODO: What does hw actually do?) + for (; count < 16; count++) { + vsOutputRegisters[count] = count; + } + + for (int i = 0; i < vertConfig.outputCount; i++) { + const u32 config = vertConfig.outmaps[i]; + for (int j = 0; j < 4; j++) { + const u32 mapping = (config >> (j * 8)) & 0x1F; + outputMappings[mapping] = std::make_pair(vsOutputRegisters[i], j); + } + } + } + + auto getSemanticName = [&](u32 semanticIndex) { + auto [reg, lane] = outputMappings[semanticIndex]; + return fmt::format("out_regs[{}][{}]", reg, lane); + }; + + std::string semantics = fmt::format( + R"( + vec4 a_coords = vec4({}, {}, {}, {}); + vec4 a_quaternion = vec4({}, {}, {}, {}); + vec4 a_vertexColour = vec4({}, {}, {}, {}); + vec2 a_texcoord0 = vec2({}, {}); + float a_texcoord0_w = {}; + vec2 a_texcoord1 = vec2({}, {}); + vec2 a_texcoord2 = vec2({}, {}); + vec3 a_view = vec3({}, {}, {}); +)", + getSemanticName(0), getSemanticName(1), getSemanticName(2), getSemanticName(3), getSemanticName(4), getSemanticName(5), getSemanticName(6), + getSemanticName(7), getSemanticName(8), getSemanticName(9), getSemanticName(10), getSemanticName(11), getSemanticName(12), + getSemanticName(13), getSemanticName(16), getSemanticName(14), getSemanticName(15), getSemanticName(22), getSemanticName(23), + getSemanticName(18), getSemanticName(19), getSemanticName(20) + ); + + if (usingUbershader) { + Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader"); + return picaSource; + } else { + // TODO: Uniforms and don't hardcode fixed-function semantic indices... + std::string ret = picaSource; + if (api == API::GLES) { + ret += "\n#define USING_GLES\n"; + } + + ret += uniformDefinition; + + ret += R"( +out vec4 v_quaternion; +out vec4 v_colour; +out vec3 v_texcoord0; +out vec2 v_texcoord1; +out vec3 v_view; +out vec2 v_texcoord2; + +#ifndef USING_GLES + out float gl_ClipDistance[2]; +#endif + +void main() { + pica_shader_main(); +)"; + // Transfer fixed function fragment registers from vertex shader output to the fragment shader + ret += semantics; + + ret += R"( + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + v_quaternion = a_quaternion; + +#ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); +#endif +})"; + return ret; + } +} + +void FragmentGenerator::compileLogicOps(std::string& shader, const PICA::FragmentConfig& config) { + if (api != API::GLES) [[unlikely]] { + Helpers::warn("Shadergen: Unsupported API for compileLogicOps"); + shader += "fragColor = combinerOutput;\n}"; // End of main function + + return; + } + + shader += "fragColor = "; + switch (config.outConfig.logicOpMode) { + case PICA::LogicOpMode::Copy: shader += "combinerOutput"; break; + case PICA::LogicOpMode::Nop: shader += "fb_color"; break; + case PICA::LogicOpMode::Clear: shader += "vec4(0.0)"; break; + case PICA::LogicOpMode::Set: shader += "vec4(1.0)"; break; + case PICA::LogicOpMode::InvertedCopy: shader += "vec4(uvec4(combinerOutput * 255.0) ^ uvec4(0xFFu)) * (1.0 / 255.0)"; break; + + default: + shader += "combinerOutput"; + Helpers::warn("Shadergen: Unimplemented logic op mode"); + break; + } + + shader += ";\n}"; // End of main function +} diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 003ef97a..a85c7464 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -74,6 +74,9 @@ void PICAShader::run() { break; } + // Undocumented, implementation based on 3DBrew and hw testing (see tests/PICA_LITP) + case ShaderOpcodes::LITP: [[unlikely]] litp(instruction); break; + default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -753,4 +756,33 @@ void PICAShader::jmpu(u32 instruction) { if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want pc = dest; +} + +void PICAShader::litp(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + src = getIndexedSource(src, idx); + vec4f srcVec = getSourceSwizzled<1>(src, operandDescriptor); + vec4f& destVector = getDest(dest); + + // Compare registers are set based on whether src.x and src.w are >= 0.0 + cmpRegister[0] = (srcVec[0].toFloat32() >= 0.0f); + cmpRegister[1] = (srcVec[3].toFloat32() >= 0.0f); + + vec4f result; + // TODO: Does max here have the same non-IEEE NaN behavior as the max instruction? + result[0] = f24::fromFloat32(std::max(srcVec[0].toFloat32(), 0.0f)); + result[1] = f24::fromFloat32(std::clamp(srcVec[1].toFloat32(), -127.9961f, 127.9961f)); + result[2] = f24::zero(); + result[3] = f24::fromFloat32(std::max(srcVec[3].toFloat32(), 0.0f)); + + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = result[3 - i]; + } + } } \ No newline at end of file diff --git a/src/core/PICA/shader_unit.cpp b/src/core/PICA/shader_unit.cpp index aa7b4c12..6b291d31 100644 --- a/src/core/PICA/shader_unit.cpp +++ b/src/core/PICA/shader_unit.cpp @@ -9,7 +9,6 @@ void ShaderUnit::reset() { void PICAShader::reset() { loadedShader.fill(0); - bufferedShader.fill(0); operandDescriptors.fill(0); boolUniform = 0; @@ -35,4 +34,5 @@ void PICAShader::reset() { codeHashDirty = true; opdescHashDirty = true; + uniformsDirty = true; } \ No newline at end of file diff --git a/src/core/audio/aac_decoder.cpp b/src/core/audio/aac_decoder.cpp new file mode 100644 index 00000000..281539d8 --- /dev/null +++ b/src/core/audio/aac_decoder.cpp @@ -0,0 +1,139 @@ +#include "audio/aac_decoder.hpp" + +#include + +#include +using namespace Audio; + +void AAC::Decoder::decode(AAC::Message& response, const AAC::Message& request, AAC::Decoder::PaddrCallback paddrCallback) { + // Copy the command and mode fields of the request to the response + response.command = request.command; + response.mode = request.mode; + response.decodeResponse.size = request.decodeRequest.size; + + // Write a dummy response at first. We'll be overwriting it later if decoding goes well + response.resultCode = AAC::ResultCode::Success; + response.decodeResponse.channelCount = 2; + response.decodeResponse.sampleCount = 1024; + response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000; + + if (!isInitialized()) { + initialize(); + + // AAC decoder failed to initialize, return dummy data and return without decoding + if (!isInitialized()) { + Helpers::warn("Failed to initialize AAC decoder"); + return; + } + } + + u8* input = paddrCallback(request.decodeRequest.address); + const u8* inputEnd = paddrCallback(request.decodeRequest.address + request.decodeRequest.size); + u8* outputLeft = paddrCallback(request.decodeRequest.destAddrLeft); + u8* outputRight = nullptr; + + if (input == nullptr || inputEnd == nullptr || outputLeft == nullptr) { + Helpers::warn("Invalid pointers passed to AAC decoder"); + return; + } + + u32 bytesValid = request.decodeRequest.size; + u32 bufferSize = request.decodeRequest.size; + + // Each frame is 2048 samples with 2 channels + static constexpr usize frameSize = 2048 * 2; + std::array frame; + std::array, 2> audioStreams; + + bool queriedStreamInfo = false; + + while (bytesValid != 0) { + if (aacDecoder_Fill(decoderHandle, &input, &bufferSize, &bytesValid) != AAC_DEC_OK) { + Helpers::warn("Failed to fill AAC decoder with samples"); + return; + } + + auto decodeResult = aacDecoder_DecodeFrame(decoderHandle, frame.data(), frameSize, 0); + + if (decodeResult == AAC_DEC_TRANSPORT_SYNC_ERROR) { + // https://android.googlesource.com/platform/external/aac/+/2ddc922/libAACdec/include/aacdecoder_lib.h#362 + // According to the above, if we get a sync error, we're not meant to stop decoding, but rather just continue feeding data + } else if (decodeResult == AAC_DEC_OK) { + auto getSampleRate = [](u32 rate) { + switch (rate) { + case 8000: return AAC::SampleRate::Rate8000; + case 11025: return AAC::SampleRate::Rate11025; + case 12000: return AAC::SampleRate::Rate12000; + case 16000: return AAC::SampleRate::Rate16000; + case 22050: return AAC::SampleRate::Rate22050; + case 24000: return AAC::SampleRate::Rate24000; + case 32000: return AAC::SampleRate::Rate32000; + case 44100: return AAC::SampleRate::Rate44100; + case 48000: + default: return AAC::SampleRate::Rate48000; + } + }; + + auto info = aacDecoder_GetStreamInfo(decoderHandle); + response.decodeResponse.sampleCount = info->frameSize; + response.decodeResponse.channelCount = info->numChannels; + response.decodeResponse.sampleRate = getSampleRate(info->sampleRate); + + int channels = info->numChannels; + // Reserve space in our output stream vectors so push_back doesn't do allocations + for (int i = 0; i < channels; i++) { + audioStreams[i].reserve(audioStreams[i].size() + info->frameSize); + } + + // Fetch output pointer for right output channel if we've got > 1 channel + if (channels > 1 && outputRight == nullptr) { + outputRight = paddrCallback(request.decodeRequest.destAddrRight); + // If the right output channel doesn't point to a proper padddr, return + if (outputRight == nullptr) { + Helpers::warn("Right AAC output channel doesn't point to valid physical address"); + return; + } + } + + for (int sample = 0; sample < info->frameSize; sample++) { + for (int stream = 0; stream < channels; stream++) { + audioStreams[stream].push_back(frame[(sample * channels) + stream]); + } + } + } else { + Helpers::warn("Failed to decode AAC frame"); + return; + } + } + + for (int i = 0; i < 2; i++) { + auto& stream = audioStreams[i]; + u8* pointer = (i == 0) ? outputLeft : outputRight; + + if (!stream.empty() && pointer != nullptr) { + std::memcpy(pointer, stream.data(), stream.size() * sizeof(s16)); + } + } +} + +void AAC::Decoder::initialize() { + decoderHandle = aacDecoder_Open(TRANSPORT_TYPE::TT_MP4_ADTS, 1); + + if (decoderHandle == nullptr) [[unlikely]] { + return; + } + + // Cap output channel count to 2 + if (aacDecoder_SetParam(decoderHandle, AAC_PCM_MAX_OUTPUT_CHANNELS, 2) != AAC_DEC_OK) [[unlikely]] { + aacDecoder_Close(decoderHandle); + decoderHandle = nullptr; + return; + } +} + +AAC::Decoder::~Decoder() { + if (isInitialized()) { + aacDecoder_Close(decoderHandle); + decoderHandle = nullptr; + } +} \ No newline at end of file diff --git a/src/core/audio/hle_core.cpp b/src/core/audio/hle_core.cpp index 4ee5a1dc..85eee97a 100644 --- a/src/core/audio/hle_core.cpp +++ b/src/core/audio/hle_core.cpp @@ -2,9 +2,11 @@ #include #include +#include #include #include +#include "audio/aac_decoder.hpp" #include "services/dsp.hpp" namespace Audio { @@ -22,6 +24,8 @@ namespace Audio { for (int i = 0; i < sources.size(); i++) { sources[i].index = i; } + + aacDecoder.reset(new Audio::AAC::Decoder()); } void HLE_DSP::resetAudioPipe() { @@ -64,10 +68,6 @@ namespace Audio { dspState = DSPState::Off; loaded = false; - // Initialize these to some sane defaults - sampleFormat = SampleFormat::ADPCM; - sourceType = SourceType::Stereo; - for (auto& e : pipeData) { e.clear(); } @@ -76,6 +76,7 @@ namespace Audio { source.reset(); } + mixer.reset(); // Note: Reset audio pipe AFTER resetting all pipes, otherwise the new data will be yeeted resetAudioPipe(); } @@ -98,14 +99,18 @@ namespace Audio { scheduler.removeEvent(Scheduler::EventType::RunDSP); } - void HLE_DSP::runAudioFrame() { + void HLE_DSP::runAudioFrame(u64 eventTimestamp) { // Signal audio pipe when an audio frame is done if (dspState == DSPState::On) [[likely]] { dspService.triggerPipeEvent(DSPPipeType::Audio); } + // TODO: Should this be called if dspState != DSPState::On? outputFrame(); - scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame); + + // How many cycles we were late + const u64 cycleDrift = scheduler.currentTimestamp - eventTimestamp; + scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::cyclesPerFrame - cycleDrift); } u16 HLE_DSP::recvData(u32 regId) { @@ -113,7 +118,7 @@ namespace Audio { Helpers::panic("Audio: invalid register in HLE frontend"); } - return dspState == DSPState::On; + return dspState != DSPState::On; } void HLE_DSP::writeProcessPipe(u32 channel, u32 size, u32 buffer) { @@ -152,12 +157,26 @@ namespace Audio { break; } - case DSPPipeType::Binary: - Helpers::warn("Unimplemented write to binary pipe! Size: %d\n", size); + case DSPPipeType::Binary: { + log("Unimplemented write to binary pipe! Size: %d\n", size); + + AAC::Message request; + if (size == sizeof(request)) { + std::array raw; + for (uint i = 0; i < size; i++) { + raw[i] = mem.read32(buffer + i); + } + + std::memcpy(&request, raw.data(), sizeof(request)); + handleAACRequest(request); + } else { + Helpers::warn("Invalid size for AAC request"); + } // This pipe and interrupt are normally used for requests like AAC decode dspService.triggerPipeEvent(DSPPipeType::Binary); break; + } default: log("Audio::HLE_DSP: Wrote to unimplemented pipe %d\n", channel); break; } @@ -205,6 +224,11 @@ namespace Audio { SharedMemory& read = readRegion(); SharedMemory& write = writeRegion(); + // TODO: Properly implement mixers + // The DSP checks the DSP configuration dirty bits on every frame, applies them, and clears them + read.dspConfiguration.dirtyRaw = 0; + read.dspConfiguration.dirtyRaw2 = 0; + for (int i = 0; i < sourceCount; i++) { // Update source configuration from the read region of shared memory auto& config = read.sourceConfigurations.config[i]; @@ -212,20 +236,23 @@ namespace Audio { updateSourceConfig(source, config, read.adpcmCoefficients.coeff[i]); // Generate audio - if (source.enabled && !source.buffers.empty()) { - const auto& buffer = source.buffers.top(); - const u8* data = getPointerPhys(buffer.paddr); - - if (data != nullptr) { - // TODO - } + if (source.enabled) { + generateFrame(source); } // Update write region of shared memory auto& status = write.sourceStatuses.status[i]; - status.isEnabled = source.enabled; + status.enabled = source.enabled; status.syncCount = source.syncCount; + status.currentBufferIDDirty = (source.isBufferIDDirty ? 1 : 0); + status.currentBufferID = source.currentBufferID; + status.previousBufferID = source.previousBufferID; + status.samplePosition = source.samplePosition; + + source.isBufferIDDirty = false; } + + performMix(read, write); } void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients) { @@ -234,6 +261,17 @@ namespace Audio { return; } + // The reset flags take priority, as you can reset a source and set it up to be played again at the same time + if (config.resetFlag) { + config.resetFlag = 0; + source.reset(); + } + + if (config.partialResetFlag) { + config.partialResetFlag = 0; + source.buffers = {}; + } + if (config.enableDirty) { config.enableDirty = 0; source.enabled = config.enable != 0; @@ -253,26 +291,23 @@ namespace Audio { ); } - if (config.resetFlag) { - config.resetFlag = 0; - source.reset(); - } - - if (config.partialResetFlag) { - config.partialResetFlag = 0; - source.buffers = {}; - } - // TODO: Should we check bufferQueueDirty here too? if (config.formatDirty || config.embeddedBufferDirty) { - sampleFormat = config.format; + source.sampleFormat = config.format; } if (config.monoOrStereoDirty || config.embeddedBufferDirty) { - sourceType = config.monoOrStereo; + source.sourceType = config.monoOrStereo; + } + + if (config.rateMultiplierDirty) { + source.rateMultiplier = (config.rateMultiplier > 0.f) ? config.rateMultiplier : 1.f; } if (config.embeddedBufferDirty) { + // Annoyingly, and only for embedded buffer, whether we use config.playPosition depends on the relevant dirty bit + const u32 playPosition = config.playPositionDirty ? config.playPosition : 0; + config.embeddedBufferDirty = 0; if (s32(config.length) >= 0) [[likely]] { // TODO: Add sample format and channel count @@ -284,9 +319,9 @@ namespace Audio { .adpcmDirty = config.adpcmDirty != 0, .looping = config.isLooping != 0, .bufferID = config.bufferID, - .playPosition = config.playPosition, - .format = sampleFormat, - .sourceType = sourceType, + .playPosition = playPosition, + .format = source.sampleFormat, + .sourceType = source.sourceType, .fromQueue = false, .hasPlayedOnce = false, }; @@ -303,8 +338,40 @@ namespace Audio { } if (config.bufferQueueDirty) { + // printf("Buffer queue dirty for voice %d\n", source.index); + + u16 dirtyBuffers = config.buffersDirty; config.bufferQueueDirty = 0; - printf("Buffer queue dirty for voice %d\n", source.index); + config.buffersDirty = 0; + + for (int i = 0; i < 4; i++) { + bool dirty = ((dirtyBuffers >> i) & 1) != 0; + if (dirty) { + const auto& buffer = config.buffers[i]; + + if (s32(buffer.length) >= 0) [[likely]] { + // TODO: Add sample format and channel count + Source::Buffer newBuffer{ + .paddr = buffer.physicalAddress, + .sampleCount = buffer.length, + .adpcmScale = u8(buffer.adpcm_ps), + .previousSamples = {s16(buffer.adpcm_yn[0]), s16(buffer.adpcm_yn[1])}, + .adpcmDirty = buffer.adpcmDirty != 0, + .looping = buffer.isLooping != 0, + .bufferID = buffer.bufferID, + .playPosition = 0, + .format = source.sampleFormat, + .sourceType = source.sourceType, + .fromQueue = true, + .hasPlayedOnce = false, + }; + + source.buffers.emplace(std::move(newBuffer)); + } else { + printf("Buffer queue dirty: Invalid buffer size for DSP voice %d\n", source.index); + } + } + } } config.dirtyRaw = 0; @@ -327,13 +394,163 @@ namespace Audio { return; } - switch (buffer.format) { - case SampleFormat::PCM8: - case SampleFormat::PCM16: Helpers::warn("Unimplemented sample format!"); break; + source.currentBufferID = buffer.bufferID; + source.previousBufferID = 0; + // For looping buffers, this is only set for the first time we play it. Loops do not set the dirty bit. + source.isBufferIDDirty = !buffer.hasPlayedOnce && buffer.fromQueue; - case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break; - default: Helpers::warn("Invalid DSP sample format"); break; + if (buffer.hasPlayedOnce) { + source.samplePosition = 0; + } else { + // Mark that the buffer has already been played once, needed for looping buffers + buffer.hasPlayedOnce = true; + // Play position is only used for the initial time the buffer is played. Loops will start from the beginning of the buffer. + source.samplePosition = buffer.playPosition; } + + switch (buffer.format) { + case SampleFormat::PCM8: source.currentSamples = decodePCM8(data, buffer.sampleCount, source); break; + case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, buffer.sampleCount, source); break; + case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break; + + default: + Helpers::warn("Invalid DSP sample format"); + source.currentSamples = {}; + break; + } + + // If the buffer is a looping buffer, re-push it + if (buffer.looping) { + source.pushBuffer(buffer); + } + + // We're skipping the first samplePosition samples, so remove them from the buffer so as not to consume them later + if (source.samplePosition > 0) { + auto start = source.currentSamples.begin(); + auto end = std::next(start, source.samplePosition); + source.currentSamples.erase(start, end); + } + } + + void HLE_DSP::generateFrame(DSPSource& source) { + if (source.currentSamples.empty()) { + // There's no audio left to play, turn the voice off + if (source.buffers.empty()) { + source.enabled = false; + source.isBufferIDDirty = true; + source.previousBufferID = source.currentBufferID; + source.currentBufferID = 0; + + return; + } + + decodeBuffer(source); + } else { + uint maxSampleCount = uint(float(Audio::samplesInFrame) * source.rateMultiplier); + uint outputCount = 0; + + while (outputCount < maxSampleCount) { + if (source.currentSamples.empty()) { + if (source.buffers.empty()) { + break; + } else { + decodeBuffer(source); + } + } + + const uint sampleCount = std::min(maxSampleCount - outputCount, source.currentSamples.size()); + + // samples.insert(samples.end(), source.currentSamples.begin(), source.currentSamples.begin() + sampleCount); + source.currentSamples.erase(source.currentSamples.begin(), std::next(source.currentSamples.begin(), sampleCount)); + source.samplePosition += sampleCount; + outputCount += sampleCount; + } + } + } + + void HLE_DSP::performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion) { + updateMixerConfig(readRegion); + // TODO: Do the actual audio mixing + + auto& dspStatus = writeRegion.dspStatus; + // Stub the DSP status. It's unknown what the "unknown" field is but Citra sets it to 0, so we do too to be safe + dspStatus.droppedFrames = 0; + dspStatus.unknown = 0; + } + + void HLE_DSP::updateMixerConfig(Audio::HLE::SharedMemory& sharedMem) { + auto& config = sharedMem.dspConfiguration; + // No configs have been changed, so there's nothing to update + if (config.dirtyRaw == 0) { + return; + } + + if (config.outputFormatDirty) { + mixer.channelFormat = config.outputFormat; + } + + if (config.masterVolumeDirty) { + mixer.volumes[0] = config.masterVolume; + } + + if (config.auxVolume0Dirty) { + mixer.volumes[1] = config.auxVolumes[0]; + } + + if (config.auxVolume1Dirty) { + mixer.volumes[2] = config.auxVolumes[1]; + } + + if (config.auxBusEnable0Dirty) { + mixer.enableAuxStages[0] = config.auxBusEnable[0] != 0; + } + + if (config.auxBusEnable1Dirty) { + mixer.enableAuxStages[1] = config.auxBusEnable[1] != 0; + } + + config.dirtyRaw = 0; + } + + HLE_DSP::SampleBuffer HLE_DSP::decodePCM8(const u8* data, usize sampleCount, Source& source) { + SampleBuffer decodedSamples(sampleCount); + + if (source.sourceType == SourceType::Stereo) { + for (usize i = 0; i < sampleCount; i++) { + const s16 left = s16(u16(*data++) << 8); + const s16 right = s16(u16(*data++) << 8); + decodedSamples[i] = {left, right}; + } + } else { + // Mono + for (usize i = 0; i < sampleCount; i++) { + const s16 sample = s16(u16(*data++) << 8); + decodedSamples[i] = {sample, sample}; + } + } + + return decodedSamples; + } + + HLE_DSP::SampleBuffer HLE_DSP::decodePCM16(const u8* data, usize sampleCount, Source& source) { + SampleBuffer decodedSamples(sampleCount); + const s16* data16 = reinterpret_cast(data); + + if (source.sourceType == SourceType::Stereo) { + for (usize i = 0; i < sampleCount; i++) { + const s16 left = *data16++; + const s16 right = *data16++; + decodedSamples[i] = {left, right}; + } + } else { + // Mono + for (usize i = 0; i < sampleCount; i++) { + const s16 sample = *data16++; + decodedSamples[i] = {sample, sample}; + } + } + + return decodedSamples; } HLE_DSP::SampleBuffer HLE_DSP::decodeADPCM(const u8* data, usize sampleCount, Source& source) { @@ -411,10 +628,61 @@ namespace Audio { return decodedSamples; } + void HLE_DSP::handleAACRequest(const AAC::Message& request) { + AAC::Message response; + + switch (request.command) { + case AAC::Command::EncodeDecode: { + // Dummy response to stop games from hanging + response.resultCode = AAC::ResultCode::Success; + response.decodeResponse.channelCount = 2; + response.decodeResponse.sampleCount = 1024; + response.decodeResponse.size = 0; + response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000; + + response.command = request.command; + response.mode = request.mode; + + // TODO: Make this a toggle in config.toml. Currently we have it on by default. + constexpr bool enableAAC = true; + if (enableAAC) { + aacDecoder->decode(response, request, [this](u32 paddr) { return getPointerPhys(paddr); }); + } + break; + } + + case AAC::Command::Init: + case AAC::Command::Shutdown: + case AAC::Command::LoadState: + case AAC::Command::SaveState: + response = request; + response.resultCode = AAC::ResultCode::Success; + break; + + default: Helpers::warn("Unknown AAC command type"); break; + } + + // Copy response data to the binary pipe + auto& pipe = pipeData[DSPPipeType::Binary]; + pipe.resize(sizeof(response)); + std::memcpy(&pipe[0], &response, sizeof(response)); + } + void DSPSource::reset() { enabled = false; + isBufferIDDirty = false; + + // Initialize these to some sane defaults + sampleFormat = SampleFormat::ADPCM; + sourceType = SourceType::Stereo; + + samplePosition = 0; + previousBufferID = 0; + currentBufferID = 0; syncCount = 0; + rateMultiplier = 1.f; buffers = {}; + currentSamples.clear(); } } // namespace Audio diff --git a/src/core/audio/miniaudio_device.cpp b/src/core/audio/miniaudio_device.cpp index fa36cb84..dd5cfa85 100644 --- a/src/core/audio/miniaudio_device.cpp +++ b/src/core/audio/miniaudio_device.cpp @@ -90,16 +90,17 @@ void MiniAudioDevice::init(Samples& samples, bool safe) { deviceConfig.dataCallback = [](ma_device* device, void* out, const void* input, ma_uint32 frameCount) { auto self = reinterpret_cast(device->pUserData); s16* output = reinterpret_cast(out); + const usize maxSamples = std::min(self->samples->Capacity(), usize(frameCount * channelCount)); // Wait until there's enough samples to pop - while (self->samples->size() < frameCount * channelCount) { + while (self->samples->size() < maxSamples) { // If audio output is disabled from the emulator thread, make sure that this callback will return and not hang if (!self->running) { return; } } - self->samples->pop(output, frameCount * channelCount); + self->samples->pop(output, maxSamples); }; if (ma_device_init(&context, &deviceConfig, &device) != MA_SUCCESS) { diff --git a/src/core/audio/null_core.cpp b/src/core/audio/null_core.cpp index ec073ae7..93c746cb 100644 --- a/src/core/audio/null_core.cpp +++ b/src/core/audio/null_core.cpp @@ -74,7 +74,7 @@ namespace Audio { scheduler.removeEvent(Scheduler::EventType::RunDSP); } - void NullDSP::runAudioFrame() { + void NullDSP::runAudioFrame(u64 eventTimestamp) { // Signal audio pipe when an audio frame is done if (dspState == DSPState::On) [[likely]] { dspService.triggerPipeEvent(DSPPipeType::Audio); diff --git a/src/core/crypto/aes_engine.cpp b/src/core/crypto/aes_engine.cpp index f4bf3494..dc3ae060 100644 --- a/src/core/crypto/aes_engine.cpp +++ b/src/core/crypto/aes_engine.cpp @@ -1,13 +1,15 @@ -#include -#include - #include "crypto/aes_engine.hpp" + +#include +#include +#include + #include "helpers.hpp" namespace Crypto { void AESEngine::loadKeys(const std::filesystem::path& path) { std::ifstream file(path, std::ios::in); - + if (file.fail()) { Helpers::warn("Keys: Couldn't read key file: %s", path.c_str()); return; @@ -58,18 +60,10 @@ namespace Crypto { } switch (keyType) { - case 'X': - setKeyX(slotId, key.value()); - break; - case 'Y': - setKeyY(slotId, key.value()); - break; - case 'N': - setNormalKey(slotId, key.value()); - break; - default: - Helpers::warn("Keys: Invalid key type %c", keyType); - break; + case 'X': setKeyX(slotId, key.value()); break; + case 'Y': setKeyY(slotId, key.value()); break; + case 'N': setNormalKey(slotId, key.value()); break; + default: Helpers::warn("Keys: Invalid key type %c", keyType); break; } } @@ -80,4 +74,65 @@ namespace Crypto { keysLoaded = true; } -}; \ No newline at end of file + + void AESEngine::setSeedPath(const std::filesystem::path& path) { seedDatabase.open(path, "rb"); } + + // Loads seeds from a seed file, return true on success and false on failure + bool AESEngine::loadSeeds() { + if (!seedDatabase.isOpen()) { + return false; + } + + // The # of seeds is stored at offset 0 + u32_le seedCount = 0; + + if (!seedDatabase.rewind()) { + return false; + } + + auto [success, size] = seedDatabase.readBytes(&seedCount, sizeof(u32)); + if (!success || size != sizeof(u32)) { + return false; + } + + // Key data starts from offset 16 + if (!seedDatabase.seek(16)) { + return false; + } + + Crypto::Seed seed; + for (uint i = 0; i < seedCount; i++) { + std::tie(success, size) = seedDatabase.readBytes(&seed, sizeof(seed)); + if (!success || size != sizeof(seed)) { + return false; + } + + seeds.push_back(seed); + } + + return true; + } + + std::optional AESEngine::getSeedFromDB(u64 titleID) { + // We don't have a seed db nor any seeds loaded, return nullopt + if (!seedDatabase.isOpen() && seeds.empty()) { + return std::nullopt; + } + + // We have a seed DB but haven't loaded the seeds yet, so load them + if (seedDatabase.isOpen() && seeds.empty()) { + bool success = loadSeeds(); + if (!success) { + return std::nullopt; + } + } + + for (Crypto::Seed& seed : seeds) { + if (seed.titleID == titleID) { + return seed.seed; + } + } + + return std::nullopt; + } +}; // namespace Crypto \ No newline at end of file diff --git a/src/core/fs/archive_sdmc.cpp b/src/core/fs/archive_sdmc.cpp index 6c34de7a..97b02b9e 100644 --- a/src/core/fs/archive_sdmc.cpp +++ b/src/core/fs/archive_sdmc.cpp @@ -39,7 +39,35 @@ HorizonResult SDMCArchive::createFile(const FSPath& path, u64 size) { } HorizonResult SDMCArchive::deleteFile(const FSPath& path) { - Helpers::panic("[SDMC] Unimplemented DeleteFile"); + if (path.type == PathType::UTF16) { + if (!isPathSafe(path)) { + Helpers::panic("Unsafe path in SDMC::DeleteFile"); + } + + fs::path p = IOFile::getAppData() / "SDMC"; + p += fs::path(path.utf16_string).make_preferred(); + + if (fs::is_directory(p)) { + Helpers::panic("SDMC::DeleteFile: Tried to delete directory"); + } + + if (!fs::is_regular_file(p)) { + return Result::FS::FileNotFoundAlt; + } + + std::error_code ec; + bool success = fs::remove(p, ec); + + // It might still be possible for fs::remove to fail, if there's eg an open handle to a file being deleted + // In this case, print a warning, but still return success for now + if (!success) { + Helpers::warn("SDMC::DeleteFile: fs::remove failed\n"); + } + + return Result::Success; + } + + Helpers::panic("SDMCArchive::DeleteFile: Unknown path type"); return Result::Success; } @@ -145,7 +173,7 @@ Rust::Result SDMCArchive::openDirectory(const F if (path.type == PathType::UTF16) { if (!isPathSafe(path)) { - Helpers::panic("Unsafe path in SaveData::OpenDirectory"); + Helpers::panic("Unsafe path in SDMC::OpenDirectory"); } fs::path p = IOFile::getAppData() / "SDMC"; diff --git a/src/core/kernel/address_arbiter.cpp b/src/core/kernel/address_arbiter.cpp index 8c07b423..d15c81b8 100644 --- a/src/core/kernel/address_arbiter.cpp +++ b/src/core/kernel/address_arbiter.cpp @@ -12,7 +12,7 @@ static const char* arbitrationTypeToString(u32 type) { } } -Handle Kernel::makeArbiter() { +HorizonHandle Kernel::makeArbiter() { if (arbiterCount >= appResourceLimits.maxAddressArbiters) { Helpers::panic("Overflowed the number of address arbiters"); } diff --git a/src/core/kernel/events.cpp b/src/core/kernel/events.cpp index 7c0d3047..6d3dfbd7 100644 --- a/src/core/kernel/events.cpp +++ b/src/core/kernel/events.cpp @@ -12,7 +12,7 @@ const char* Kernel::resetTypeToString(u32 type) { } } -Handle Kernel::makeEvent(ResetType resetType, Event::CallbackType callback) { +HorizonHandle Kernel::makeEvent(ResetType resetType, Event::CallbackType callback) { Handle ret = makeObject(KernelObjectType::Event); objects[ret].data = new Event(resetType, callback); return ret; diff --git a/src/core/kernel/file_operations.cpp b/src/core/kernel/file_operations.cpp index 972190fa..2b2020d1 100644 --- a/src/core/kernel/file_operations.cpp +++ b/src/core/kernel/file_operations.cpp @@ -184,7 +184,8 @@ void Kernel::setFileSize(u32 messagePointer, Handle fileHandle) { if (success) { mem.write32(messagePointer + 4, Result::Success); } else { - Helpers::panic("FileOp::SetFileSize failed"); + Helpers::warn("FileOp::SetFileSize failed"); + mem.write32(messagePointer + 4, Result::FailurePlaceholder); } } else { Helpers::panic("Tried to set file size of file without file descriptor"); diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index 03e8a4b9..a404ec9c 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -83,7 +83,7 @@ void Kernel::setVersion(u8 major, u8 minor) { mem.kernelVersion = descriptor; // The memory objects needs a copy because you can read the kernel ver from config mem } -Handle Kernel::makeProcess(u32 id) { +HorizonHandle Kernel::makeProcess(u32 id) { const Handle processHandle = makeObject(KernelObjectType::Process); const Handle resourceLimitHandle = makeObject(KernelObjectType::ResourceLimit); @@ -401,3 +401,5 @@ std::string Kernel::getProcessName(u32 pid) { Helpers::panic("Attempted to name non-current process"); } } + +Scheduler& Kernel::getScheduler() { return cpu.getScheduler(); } diff --git a/src/core/kernel/memory_management.cpp b/src/core/kernel/memory_management.cpp index 24943c3a..901ee1cd 100644 --- a/src/core/kernel/memory_management.cpp +++ b/src/core/kernel/memory_management.cpp @@ -163,7 +163,7 @@ void Kernel::mapMemoryBlock() { break; case KernelHandles::FontSharedMemHandle: - mem.copySharedFont(ptr); + mem.copySharedFont(ptr, addr); break; case KernelHandles::CSNDSharedMemHandle: @@ -181,7 +181,7 @@ void Kernel::mapMemoryBlock() { regs[0] = Result::Success; } -Handle Kernel::makeMemoryBlock(u32 addr, u32 size, u32 myPermission, u32 otherPermission) { +HorizonHandle Kernel::makeMemoryBlock(u32 addr, u32 size, u32 myPermission, u32 otherPermission) { Handle ret = makeObject(KernelObjectType::MemoryBlock); objects[ret].data = new MemoryBlock(addr, size, myPermission, otherPermission); diff --git a/src/core/kernel/ports.cpp b/src/core/kernel/ports.cpp index 6038de44..61ab26e3 100644 --- a/src/core/kernel/ports.cpp +++ b/src/core/kernel/ports.cpp @@ -1,7 +1,7 @@ #include "kernel.hpp" #include -Handle Kernel::makePort(const char* name) { +HorizonHandle Kernel::makePort(const char* name) { Handle ret = makeObject(KernelObjectType::Port); portHandles.push_back(ret); // Push the port handle to our cache of port handles objects[ret].data = new Port(name); @@ -9,7 +9,7 @@ Handle Kernel::makePort(const char* name) { return ret; } -Handle Kernel::makeSession(Handle portHandle) { +HorizonHandle Kernel::makeSession(Handle portHandle) { const auto port = getObject(portHandle, KernelObjectType::Port); if (port == nullptr) [[unlikely]] { Helpers::panic("Trying to make session for non-existent port"); @@ -23,7 +23,7 @@ Handle Kernel::makeSession(Handle portHandle) { // Get the handle of a port based on its name // If there's no such port, return nullopt -std::optional Kernel::getPortHandle(const char* name) { +std::optional Kernel::getPortHandle(const char* name) { for (auto handle : portHandles) { const auto data = objects[handle].getData(); if (std::strncmp(name, data->name, Port::maxNameLen) == 0) { diff --git a/src/core/kernel/threads.cpp b/src/core/kernel/threads.cpp index 3a6201c1..9eb7a197 100644 --- a/src/core/kernel/threads.cpp +++ b/src/core/kernel/threads.cpp @@ -109,7 +109,7 @@ void Kernel::rescheduleThreads() { } // Internal OS function to spawn a thread -Handle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, ProcessorID id, u32 arg, ThreadStatus status) { +HorizonHandle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, ProcessorID id, u32 arg, ThreadStatus status) { int index; // Index of the created thread in the threads array if (threadCount < appResourceLimits.maxThreads) [[likely]] { // If we have not yet created over too many threads @@ -161,7 +161,7 @@ Handle Kernel::makeThread(u32 entrypoint, u32 initialSP, u32 priority, Processor return ret; } -Handle Kernel::makeMutex(bool locked) { +HorizonHandle Kernel::makeMutex(bool locked) { Handle ret = makeObject(KernelObjectType::Mutex); objects[ret].data = new Mutex(locked, ret); @@ -201,7 +201,7 @@ void Kernel::releaseMutex(Mutex* moo) { } } -Handle Kernel::makeSemaphore(u32 initialCount, u32 maximumCount) { +HorizonHandle Kernel::makeSemaphore(u32 initialCount, u32 maximumCount) { Handle ret = makeObject(KernelObjectType::Semaphore); objects[ret].data = new Semaphore(initialCount, maximumCount); diff --git a/src/core/kernel/timers.cpp b/src/core/kernel/timers.cpp index 35fc57a4..8cfa4773 100644 --- a/src/core/kernel/timers.cpp +++ b/src/core/kernel/timers.cpp @@ -4,7 +4,7 @@ #include "kernel.hpp" #include "scheduler.hpp" -Handle Kernel::makeTimer(ResetType type) { +HorizonHandle Kernel::makeTimer(ResetType type) { Handle ret = makeObject(KernelObjectType::Timer); objects[ret].data = new Timer(type); diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 3bf73e5d..96d13813 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -1,12 +1,15 @@ +#include "loader/ncch.hpp" + #include #include -#include -#include -#include "loader/lz77.hpp" -#include "loader/ncch.hpp" -#include "memory.hpp" +#include +#include #include +#include + +#include "loader/lz77.hpp" +#include "memory.hpp" bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSInfo &info) { // 0x200 bytes for the NCCH header @@ -25,10 +28,12 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } codeFile.clear(); - saveData.clear(); smdh.clear(); partitionInfo = info; + primaryKey = {}; + secondaryKey = {}; + size = u64(*(u32*)&header[0x104]) * mediaUnit; // TODO: Maybe don't type pun because big endian will break exheaderSize = *(u32*)&header[0x180]; @@ -68,8 +73,26 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (!seedCrypto) { secondaryKeyY = primaryKeyY; } else { - Helpers::warn("Seed crypto is not supported"); - gotCryptoKeys = false; + // In seed crypto mode, the secondary key is computed through a SHA256 hash of the primary key and a title-specific seed, which we fetch + // from seeddb.bin + std::optional seedOptional = aesEngine.getSeedFromDB(programID); + if (seedOptional.has_value()) { + auto seed = *seedOptional; + + CryptoPP::SHA256 shaEngine; + std::array data; + std::array hash; + + std::memcpy(&data[0], primaryKeyY.data(), primaryKeyY.size()); + std::memcpy(&data[16], seed.data(), seed.size()); + shaEngine.CalculateDigest(hash.data(), data.data(), data.size()); + // Note that SHA256 will produce a 256-bit hash, while we only need 128 bits cause this is an AES key + // So the latter 16 bytes of the SHA256 are thrown out. + std::memcpy(secondaryKeyY.data(), hash.data(), secondaryKeyY.size()); + } else { + Helpers::warn("Couldn't find a seed value for this title. Make sure you have a seeddb.bin file alongside your aes_keys.txt"); + gotCryptoKeys = false; + } } auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY); @@ -78,15 +101,15 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn if (!primaryResult.first || !secondaryResult.first) { gotCryptoKeys = false; } else { - Crypto::AESKey primaryKey = primaryResult.second; - Crypto::AESKey secondaryKey = secondaryResult.second; + primaryKey = primaryResult.second; + secondaryKey = secondaryResult.second; EncryptionInfo encryptionInfoTmp; - encryptionInfoTmp.normalKey = primaryKey; + encryptionInfoTmp.normalKey = *primaryKey; encryptionInfoTmp.initialCounter.fill(0); - for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) { - encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i]; + for (usize i = 0; i < 8; i++) { + encryptionInfoTmp.initialCounter[i] = header[0x108 + 7 - i]; } encryptionInfoTmp.initialCounter[8] = 1; exheaderInfo.encryptionInfo = encryptionInfoTmp; @@ -94,7 +117,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn encryptionInfoTmp.initialCounter[8] = 2; exeFS.encryptionInfo = encryptionInfoTmp; - encryptionInfoTmp.normalKey = secondaryKey; + encryptionInfoTmp.normalKey = *secondaryKey; encryptionInfoTmp.initialCounter[8] = 3; romFS.encryptionInfo = encryptionInfoTmp; } @@ -152,8 +175,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } - const u64 saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes - saveData.resize(saveDataSize, 0xff); + saveDataSize = *(u64*)&exheader[0x1C0 + 0x0]; // Size of save data in bytes compressCode = (exheader[0xD] & 1) != 0; stackSize = *(u32*)&exheader[0x1C]; @@ -201,13 +223,20 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn Helpers::panic("Second code file in a single NCCH partition. What should this do?\n"); } + // All files in ExeFS use the same IV, though .code uses the secondary key for decryption + // whereas .icon/.banner use the primary key. + FSInfo info = exeFS; + if (encrypted && secondaryKey.has_value() && info.encryptionInfo.has_value()) { + info.encryptionInfo->normalKey = *secondaryKey; + } + if (compressCode) { std::vector tmp; tmp.resize(fileSize); // A file offset of 0 means our file is located right after the ExeFS header // So in the ROM, files are located at (file offset + exeFS offset + exeFS header size) - readFromFile(file, exeFS, tmp.data(), fileOffset + exeFSHeaderSize, fileSize); + readFromFile(file, info, tmp.data(), fileOffset + exeFSHeaderSize, fileSize); // Decompress .code file from the tmp vector to the "code" vector if (!CartLZ77::decompress(codeFile, tmp)) { @@ -216,7 +245,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn } } else { codeFile.resize(fileSize); - readFromFile(file, exeFS, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize); + readFromFile(file, info, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize); } } else if (std::strcmp(name, "icon") == 0) { // Parse icon file to extract region info and more in the future (logo, etc) @@ -295,6 +324,7 @@ std::pair NCCH::getPrimaryKey(Crypto::AESEngine &aesEngine if (encrypted) { if (fixedCryptoKey) { + result.fill(0); return {true, result}; } @@ -316,6 +346,7 @@ std::pair NCCH::getSecondaryKey(Crypto::AESEngine &aesEngi if (encrypted) { if (fixedCryptoKey) { + result.fill(0); return {true, result}; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2ff196fb..79a896f6 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -8,6 +8,7 @@ #include "config_mem.hpp" #include "kernel/fcram.hpp" #include "resource_limits.hpp" +#include "services/fonts.hpp" #include "services/ptm.hpp" CMRC_DECLARE(ConsoleFonts); @@ -47,7 +48,7 @@ void Memory::reset() { if (e.handle == KernelHandles::FontSharedMemHandle) { // Read font size from the cmrc filesystem the font is stored in auto fonts = cmrc::ConsoleFonts::get_filesystem(); - e.size = fonts.open("CitraSharedFontUSRelocated.bin").size(); + e.size = fonts.open("SharedFontReplacement.bin").size(); } e.mapped = false; @@ -578,10 +579,13 @@ Regions Memory::getConsoleRegion() { return region; } -void Memory::copySharedFont(u8* pointer) { +void Memory::copySharedFont(u8* pointer, u32 vaddr) { auto fonts = cmrc::ConsoleFonts::get_filesystem(); - auto font = fonts.open("CitraSharedFontUSRelocated.bin"); + auto font = fonts.open("SharedFontReplacement.bin"); std::memcpy(pointer, font.begin(), font.size()); + + // Relocate shared font to the address it's being loaded to + HLE::Fonts::relocateSharedFont(pointer, vaddr); } std::optional Memory::getProgramID() { diff --git a/src/core/renderer_gl/etc1.cpp b/src/core/renderer_gl/etc1.cpp index 8aefd622..0b4ed1a5 100644 --- a/src/core/renderer_gl/etc1.cpp +++ b/src/core/renderer_gl/etc1.cpp @@ -12,8 +12,9 @@ static constexpr u32 signExtend3To32(u32 val) { u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { // Pixel offset of the 8x8 tile based on u, v and the width of the texture u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) + if (!hasAlpha) { offs >>= 1; + } // In-tile offsets for u/v u &= 7; diff --git a/src/core/renderer_gl/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp index d2eec0d5..785cac41 100644 --- a/src/core/renderer_gl/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -5,9 +5,20 @@ void GLStateManager::resetBlend() { logicOpEnabled = false; logicOp = GL_COPY; + blendEquationRGB = GL_FUNC_ADD; + blendEquationAlpha = GL_FUNC_ADD; + + blendFuncSourceRGB = GL_SRC_COLOR; + blendFuncDestRGB = GL_DST_COLOR; + blendFuncSourceAlpha = GL_SRC_ALPHA; + blendFuncDestAlpha = GL_DST_ALPHA; + OpenGL::disableBlend(); OpenGL::disableLogicOp(); OpenGL::setLogicOp(GL_COPY); + + glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha); + glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha); } void GLStateManager::resetClearing() { @@ -61,9 +72,9 @@ void GLStateManager::resetVAO() { glBindVertexArray(0); } -void GLStateManager::resetVBO() { - boundVBO = 0; - glBindBuffer(GL_ARRAY_BUFFER, 0); +void GLStateManager::resetBuffers() { + boundUBO = 0; + glBindBuffer(GL_UNIFORM_BUFFER, 0); } void GLStateManager::resetProgram() { @@ -79,7 +90,7 @@ void GLStateManager::reset() { resetDepth(); resetVAO(); - resetVBO(); + resetBuffers(); resetProgram(); resetScissor(); resetStencil(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index a11a6ffa..90b8f910 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -2,11 +2,16 @@ #include +#include #include #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" +#include "PICA/pica_frag_uniforms.hpp" +#include "PICA/pica_simd.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_decompiler.hpp" +#include "config.hpp" #include "math_util.hpp" CMRC_DECLARE(RendererGL); @@ -22,6 +27,8 @@ void RendererGL::reset() { colourBufferCache.reset(); textureCache.reset(); + shaderCache.clear(); + // Init the colour/depth buffer settings to some random defaults on reset colourBufferLoc = 0; colourBufferFormat = PICA::ColorFmt::RGBA8; @@ -38,12 +45,16 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } + +#ifdef USING_GLES + fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL); +#endif } void RendererGL::initGraphicsContextInternal() { @@ -57,24 +68,7 @@ void RendererGL::initGraphicsContextInternal() { OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex); OpenGL::Shader frag({fragmentShaderSource.begin(), fragmentShaderSource.size()}, OpenGL::Fragment); triangleProgram.create({vert, frag}); - gl.useProgram(triangleProgram); - - textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); - - depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs"); - - // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1); - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); - glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3); + initUbershader(triangleProgram); auto displayVertexShaderSource = gl_resources.open("opengl_display.vert"); auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag"); @@ -86,35 +80,56 @@ void RendererGL::initGraphicsContextInternal() { gl.useProgram(displayProgram); glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object - vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW); - gl.bindVBO(vbo); - vao.create(); - gl.bindVAO(vao); + // Create stream buffers for vertex, index and uniform buffers + static constexpr usize hwIndexBufferSize = 2_MB; + static constexpr usize hwVertexBufferSize = 16_MB; + + hwIndexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, hwIndexBufferSize); + hwVertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, hwVertexBufferSize); + + // Allocate memory for the shadergen fragment uniform UBO + glGenBuffers(1, &shadergenFragmentUBO); + gl.bindUBO(shadergenFragmentUBO); + glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); + + // Allocate memory for the accelerated vertex shader uniform UBO + glGenBuffers(1, &hwShaderUniformUBO); + gl.bindUBO(hwShaderUniformUBO); + glBufferData(GL_UNIFORM_BUFFER, PICAShader::totalUniformSize(), nullptr, GL_DYNAMIC_DRAW); + + vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize * 2, GL_STREAM_DRAW); + vbo.bind(); + // Initialize the VAO used when not using hw shaders + defaultVAO.create(); + gl.bindVAO(defaultVAO); // Position (x, y, z, w) attributes - vao.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); - vao.enableAttribute(0); + defaultVAO.setAttributeFloat(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions)); + defaultVAO.enableAttribute(0); // Quaternion attribute - vao.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); - vao.enableAttribute(1); + defaultVAO.setAttributeFloat(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion)); + defaultVAO.enableAttribute(1); // Colour attribute - vao.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); - vao.enableAttribute(2); + defaultVAO.setAttributeFloat(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour)); + defaultVAO.enableAttribute(2); // UV 0 attribute - vao.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); - vao.enableAttribute(3); + defaultVAO.setAttributeFloat(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0)); + defaultVAO.enableAttribute(3); // UV 1 attribute - vao.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); - vao.enableAttribute(4); + defaultVAO.setAttributeFloat(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1)); + defaultVAO.enableAttribute(4); // UV 0 W-component attribute - vao.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); - vao.enableAttribute(5); + defaultVAO.setAttributeFloat(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w)); + defaultVAO.enableAttribute(5); // View - vao.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); - vao.enableAttribute(6); + defaultVAO.setAttributeFloat(6, 3, sizeof(Vertex), offsetof(Vertex, s.view)); + defaultVAO.enableAttribute(6); // UV 2 attribute - vao.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); - vao.enableAttribute(7); + defaultVAO.setAttributeFloat(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2)); + defaultVAO.enableAttribute(7); + + // Initialize the VAO used for hw shaders + hwShaderVAO.create(); dummyVBO.create(); dummyVAO.create(); @@ -124,7 +139,11 @@ void RendererGL::initGraphicsContextInternal() { const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall - glGenTextures(1, &lightLUTTextureArray); + // 24 rows for light, 1 for fog + LUTTexture.create(256, Lights::LUT_Count + 1, GL_RG32F); + LUTTexture.bind(); + LUTTexture.setMinFilter(OpenGL::Linear); + LUTTexture.setMagFilter(OpenGL::Linear); auto prevTexture = OpenGL::getTex2D(); @@ -165,7 +184,21 @@ void RendererGL::initGraphicsContextInternal() { OpenGL::clearColor(); OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]); + // Initialize fixed attributes + for (int i = 0; i < fixedAttrValues.size(); i++) { + fixedAttrValues[i] = {0.f, 0.f, 0.f, 0.f}; + glVertexAttrib4f(i, 0.0, 0.0, 0.0, 0.0); + } + reset(); + + // Populate our driver info structure + driverInfo.supportsExtFbFetch = (GLAD_GL_EXT_shader_framebuffer_fetch != 0); + driverInfo.supportsArmFbFetch = (GLAD_GL_ARM_shader_framebuffer_fetch != 0); + + // Initialize the default vertex shader used with shadergen + std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader(); + defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex); } // The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend) @@ -236,8 +269,8 @@ void RendererGL::setupBlending() { OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f); // Translate equations and funcs to their GL equivalents and set them - glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); - glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); + gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); + gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); } } @@ -289,10 +322,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - -void RendererGL::setupTextureEnvState() { +void RendererGL::setupUbershaderTexEnv() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, @@ -314,11 +345,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -357,26 +388,49 @@ void RendererGL::bindTexturesToSlots() { } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + LUTTexture.bind(); glActiveTexture(GL_TEXTURE0); } void RendererGL::updateLightingLUT() { gpu.lightingLUTDirty = false; - std::array u16_lightinglut; + std::array lightingLut; - for (int i = 0; i < gpu.lightingLUT.size(); i++) { - uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); - u16_lightinglut[i] = value * 65535 / 4095; + for (int i = 0; i < lightingLut.size(); i += 2) { + uint64_t value = gpu.lightingLUT[i >> 1] & 0xFFF; + lightingLut[i] = (float)(value << 4) / 65535.0f; } glActiveTexture(GL_TEXTURE0 + 3); - glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + LUTTexture.bind(); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, Lights::LUT_Count, GL_RG, GL_FLOAT, lightingLut.data()); + glActiveTexture(GL_TEXTURE0); +} + +void RendererGL::updateFogLUT() { + gpu.fogLUTDirty = false; + + // Fog LUT elements are of this type: + // 0-12 fixed1.1.11, Difference from next element + // 13-23 fixed0.0.11, Value + // We will store them as a 128x1 RG texture with R being the value and G being the difference + std::array fogLut; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + glActiveTexture(GL_TEXTURE0 + 3); + LUTTexture.bind(); + // The fog LUT exists at the end of the lighting LUT + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, Lights::LUT_Count, 128, 1, GL_RG, GL_FLOAT, fogLut.data()); glActiveTexture(GL_TEXTURE0); } @@ -391,9 +445,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); - gl.bindVBO(vbo); - gl.bindVAO(vao); - gl.useProgram(triangleProgram); + + // If we're using accelerated shaders, the hw VAO, VBO and EBO objects will have already been bound in prepareForDraw + if (!usingAcceleratedShader) { + vbo.bind(); + gl.bindVAO(defaultVAO); + } gl.enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -411,35 +468,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v const int depthFunc = getBits<4, 3>(depthControl); const int colourMask = getBits<8, 4>(depthControl); gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); - static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; - const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); - const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); - const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - - // Update depth uniforms - if (oldDepthScale != depthScale) { - oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); - } - - if (oldDepthOffset != depthOffset) { - oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); - } - - if (oldDepthmapEnable != depthMapEnable) { - oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); - } - - setupTextureEnvState(); bindTexturesToSlots(); - - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) - // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); + if (gpu.fogLUTDirty) { + updateFogLUT(); + } if (gpu.lightingLUTDirty) { updateLightingLUT(); @@ -479,15 +513,38 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v setupStencilTest(stencilEnable); - vbo.bufferVertsSub(vertices); - OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); + if (!usingAcceleratedShader) { + vbo.bufferVertsSub(vertices); + OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); + } else { + if (performIndexedRender) { + // When doing indexed rendering, use glDrawRangeElementsBaseVertex to issue the indexed draw + hwIndexBuffer->Bind(); + + if (glDrawRangeElementsBaseVertex != nullptr) [[likely]] { + glDrawRangeElementsBaseVertex( + primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, + hwIndexBufferOffset, -GLint(minimumIndex) + ); + } else { + // If glDrawRangeElementsBaseVertex is not available then prepareForDraw will have subtracted the base vertex from the index buffer + // for us, so just use glDrawRangeElements + glDrawRangeElements( + primitiveTopology, 0, GLint(maximumIndex - minimumIndex), GLsizei(vertices.size()), + usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, hwIndexBufferOffset + ); + } + } else { + // When doing non-indexed rendering, just use glDrawArrays + OpenGL::draw(primitiveTopology, GLsizei(vertices.size())); + } + } } void RendererGL::display() { gl.disableScissor(); gl.disableBlend(); gl.disableDepth(); - gl.disableScissor(); // This will work fine whether or not logic ops are enabled. We set logic op to copy instead of disabling to avoid state changes gl.setLogicOp(GL_COPY); gl.setColourMask(true, true, true, true); @@ -616,7 +673,15 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) { if (buffer.has_value()) { return buffer.value().get().texture; } else { - const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + const u8* startPointer = gpu.getPointerPhys(tex.location); + const usize sizeInBytes = tex.sizeInBytes(); + + if (startPointer == nullptr || (sizeInBytes > 0 && gpu.getPointerPhys(tex.location + sizeInBytes - 1) == nullptr)) [[unlikely]] { + Helpers::warn("Out-of-bounds texture fetch"); + return blankTexture; + } + + const auto textureData = std::span{startPointer, tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory Texture& newTex = textureCache.add(tex); newTex.decodeTexture(textureData); @@ -727,7 +792,8 @@ void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 if (inputWidth != 0) [[likely]] { copyHeight = (copySize / inputWidth) * 8; } else { - copyHeight = 0; + Helpers::warn("Zero-width texture copy"); + return; } // Find the source surface. @@ -778,6 +844,238 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } +OpenGL::Program& RendererGL::getSpecializedShader() { + constexpr uint vsUBOBlockBinding = 1; + constexpr uint fsUBOBlockBinding = 2; + + PICA::FragmentConfig fsConfig(regs); + // If we're not on GLES, ignore the logic op configuration and don't generate redundant shaders for it, since we use hw logic ops +#ifndef USING_GLES + fsConfig.outConfig.logicOpMode = PICA::LogicOpMode(0); +#endif + + OpenGL::Shader& fragShader = shaderCache.fragmentShaderCache[fsConfig]; + if (!fragShader.exists()) { + std::string fs = fragShaderGen.generate(fsConfig); + fragShader.create({fs.c_str(), fs.size()}, OpenGL::Fragment); + } + + // Get the handle of the current vertex shader + OpenGL::Shader& vertexShader = usingAcceleratedShader ? *generatedVertexShader : defaultShadergenVs; + // And form the key for looking up a shader program + const u64 programKey = (u64(vertexShader.handle()) << 32) | u64(fragShader.handle()); + + CachedProgram& programEntry = shaderCache.programCache[programKey]; + OpenGL::Program& program = programEntry.program; + + if (!program.exists()) { + program.create({vertexShader, fragShader}); + gl.useProgram(program); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); + + // Set up the binding for our UBOs. Sadly we can't specify it in the shader like normal people, + // As it's an OpenGL 4.2 feature that MacOS doesn't support... + uint fsUBOIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); + glUniformBlockBinding(program.handle(), fsUBOIndex, fsUBOBlockBinding); + + if (usingAcceleratedShader) { + uint vertexUBOIndex = glGetUniformBlockIndex(program.handle(), "PICAShaderUniforms"); + glUniformBlockBinding(program.handle(), vertexUBOIndex, vsUBOBlockBinding); + } + } + glBindBufferBase(GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO); + if (usingAcceleratedShader) { + glBindBufferBase(GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO); + } + + // Upload uniform data to our shader's UBO + PICA::FragmentUniforms uniforms; + uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]); + + // Set up the texenv buffer color + const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f; + uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f; + uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; + uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + + uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + + if (regs[InternalRegs::ClipEnable] & 1) { + uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32(); + uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32(); + uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32(); + uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32(); + } + + // Set up the constant color for the 6 TEV stages + for (int i = 0; i < 6; i++) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + auto& vec = uniforms.constantColors[i]; + u32 base = ioBases[i]; + u32 color = regs[base + 3]; + + vec[0] = float(color & 0xFF) / 255.0f; + vec[1] = float((color >> 8) & 0xFF) / 255.0f; + vec[2] = float((color >> 16) & 0xFF) / 255.0f; + vec[3] = float((color >> 24) & 0xFF) / 255.0f; + } + + uniforms.fogColor = regs[PICA::InternalRegs::FogColor]; + + // Append lighting uniforms + if (fsConfig.lighting.enable) { + uniforms.globalAmbientLight = regs[InternalRegs::LightGlobalAmbient]; + for (int i = 0; i < 8; i++) { + auto& light = uniforms.lightUniforms[i]; + const u32 specular0 = regs[InternalRegs::Light0Specular0 + i * 0x10]; + const u32 specular1 = regs[InternalRegs::Light0Specular1 + i * 0x10]; + const u32 diffuse = regs[InternalRegs::Light0Diffuse + i * 0x10]; + const u32 ambient = regs[InternalRegs::Light0Ambient + i * 0x10]; + const u32 lightXY = regs[InternalRegs::Light0XY + i * 0x10]; + const u32 lightZ = regs[InternalRegs::Light0Z + i * 0x10]; + + const u32 spotlightXY = regs[InternalRegs::Light0SpotlightXY + i * 0x10]; + const u32 spotlightZ = regs[InternalRegs::Light0SpotlightZ + i * 0x10]; + const u32 attenuationBias = regs[InternalRegs::Light0AttenuationBias + i * 0x10]; + const u32 attenuationScale = regs[InternalRegs::Light0AttenuationScale + i * 0x10]; + +#define lightColorToVec3(value) \ + { \ + float(Helpers::getBits<20, 8>(value)) / 255.0f, \ + float(Helpers::getBits<10, 8>(value)) / 255.0f, \ + float(Helpers::getBits<0, 8>(value)) / 255.0f, \ + } + light.specular0 = lightColorToVec3(specular0); + light.specular1 = lightColorToVec3(specular1); + light.diffuse = lightColorToVec3(diffuse); + light.ambient = lightColorToVec3(ambient); + light.position[0] = Floats::f16::fromRaw(u16(lightXY)).toFloat32(); + light.position[1] = Floats::f16::fromRaw(u16(lightXY >> 16)).toFloat32(); + light.position[2] = Floats::f16::fromRaw(u16(lightZ)).toFloat32(); + + // Fixed point 1.11.1 to float, without negation + light.spotlightDirection[0] = float(s32(spotlightXY & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[1] = float(s32((spotlightXY >> 16) & 0x1FFF) << 19 >> 19) / 2047.0; + light.spotlightDirection[2] = float(s32(spotlightZ & 0x1FFF) << 19 >> 19) / 2047.0; + + light.distanceAttenuationBias = Floats::f20::fromRaw(attenuationBias & 0xFFFFF).toFloat32(); + light.distanceAttenuationScale = Floats::f20::fromRaw(attenuationScale & 0xFFFFF).toFloat32(); +#undef lightColorToVec3 + } + } + + gl.bindUBO(shadergenFragmentUBO); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); + + return program; +} + +bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { + // First we figure out if we will be using an ubershader + bool usingUbershader = emulatorConfig->useUbershaders; + if (usingUbershader) { + const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0; + const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1; + + // Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen + // This way we generate fewer shaders overall than with full shadergen, but don't tank performance + if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) { + usingUbershader = false; + } + } + + // Then we figure out if we will use hw accelerated shaders, and try to fetch our shader + // TODO: Ubershader support for accelerated shaders + usingAcceleratedShader = emulatorConfig->accelerateShaders && !usingUbershader && accel != nullptr && accel->canBeAccelerated; + + if (usingAcceleratedShader) { + PICA::VertConfig vertexConfig(shaderUnit.vs, regs, usingUbershader); + + std::optional& shader = shaderCache.vertexShaderCache[vertexConfig]; + // If the optional is false, we have never tried to recompile the shader before. Try to recompile it and see if it works. + if (!shader.has_value()) { + // Initialize shader to a "null" shader (handle == 0) + shader = OpenGL::Shader(); + + std::string picaShaderSource = PICA::ShaderGen::decompileShader( + shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, + Helpers::isAndroid() ? PICA::ShaderGen::API::GLES : PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL + ); + + // Empty source means compilation error, if the source is not empty then we convert the recompiled PICA code into a valid shader and upload + // it to the GPU + if (!picaShaderSource.empty()) { + std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, vertexConfig, usingUbershader); + shader->create({vertexShaderSource}, OpenGL::Vertex); + } + } + + // Shader generation did not work out, so set usingAcceleratedShader to false + if (!shader->exists()) { + usingAcceleratedShader = false; + } else { + generatedVertexShader = &(*shader); + gl.bindUBO(hwShaderUniformUBO); + + if (shaderUnit.vs.uniformsDirty) { + shaderUnit.vs.uniformsDirty = false; + glBufferSubData(GL_UNIFORM_BUFFER, 0, PICAShader::totalUniformSize(), shaderUnit.vs.getUniformPointer()); + } + + performIndexedRender = accel->indexed; + minimumIndex = GLsizei(accel->minimumIndex); + maximumIndex = GLsizei(accel->maximumIndex); + + // Upload vertex data and index buffer data to our GPU + accelerateVertexUpload(shaderUnit, accel); + } + } + + if (!usingUbershader) { + OpenGL::Program& program = getSpecializedShader(); + gl.useProgram(program); + } else { // Bind ubershader & load ubershader uniforms + gl.useProgram(triangleProgram); + + const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + if (oldDepthScale != depthScale) { + oldDepthScale = depthScale; + glUniform1f(ubershaderData.depthScaleLoc, depthScale); + } + + if (oldDepthOffset != depthOffset) { + oldDepthOffset = depthOffset; + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); + } + + if (oldDepthmapEnable != depthMapEnable) { + oldDepthmapEnable = depthMapEnable; + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + } + + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + setupUbershaderTexEnv(); + } + + return usingAcceleratedShader; +} + void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240; @@ -791,7 +1089,7 @@ void RendererGL::screenshot(const std::string& name) { // Flip the image vertically for (int y = 0; y < height; y++) { - memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); + std::memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4); // Swap R and B channels for (int x = 0; x < width; x++) { std::swap(flippedPixels[y * width * 4 + x * 4 + 0], flippedPixels[y * width * 4 + x * 4 + 2]); @@ -808,8 +1106,148 @@ void RendererGL::deinitGraphicsContext() { textureCache.reset(); depthBufferCache.reset(); colourBufferCache.reset(); + shaderCache.clear(); // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory printf("RendererGL::DeinitGraphicsContext called\n"); +} + +std::string RendererGL::getUbershader() { + auto gl_resources = cmrc::RendererGL::get_filesystem(); + auto fragmentShader = gl_resources.open("opengl_fragment_shader.frag"); + + return std::string(fragmentShader.begin(), fragmentShader.end()); +} + +void RendererGL::setUbershader(const std::string& shader) { + auto gl_resources = cmrc::RendererGL::get_filesystem(); + auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert"); + + OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex); + OpenGL::Shader frag(shader, OpenGL::Fragment); + triangleProgram.create({vert, frag}); + + initUbershader(triangleProgram); + + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); +} + +void RendererGL::initUbershader(OpenGL::Program& program) { + gl.useProgram(program); + + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); + + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2 and the LUTs go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3); +} + +void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { + u32 buffer = 0; // Vertex buffer index for non-fixed attributes + u32 attrCount = 0; + + const u32 totalAttribCount = accel->totalAttribCount; + + static constexpr GLenum attributeFormats[4] = { + GL_BYTE, // 0: Signed byte + GL_UNSIGNED_BYTE, // 1: Unsigned byte + GL_SHORT, // 2: Short + GL_FLOAT, // 3: Float + }; + + const u32 vertexCount = accel->maximumIndex - accel->minimumIndex + 1; + + // Update index buffer if necessary + if (accel->indexed) { + usingShortIndices = accel->useShortIndices; + const usize indexBufferSize = regs[PICA::InternalRegs::VertexCountReg] * (usingShortIndices ? sizeof(u16) : sizeof(u8)); + + hwIndexBuffer->Bind(); + auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize); + hwIndexBufferOffset = reinterpret_cast(usize(indexBufferRes.buffer_offset)); + + std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize); + // If we don't have glDrawRangeElementsBaseVertex, we must subtract the base index value from our index buffer manually + if (glDrawRangeElementsBaseVertex == nullptr) [[unlikely]] { + const u32 indexCount = regs[PICA::InternalRegs::VertexCountReg]; + usingShortIndices ? PICA::IndexBuffer::subtractBaseIndex((u8*)indexBufferRes.pointer, indexCount, accel->minimumIndex) + : PICA::IndexBuffer::subtractBaseIndex((u8*)indexBufferRes.pointer, indexCount, accel->minimumIndex); + } + + hwIndexBuffer->Unmap(indexBufferSize); + } + + hwVertexBuffer->Bind(); + auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize); + u8* vertexData = static_cast(vertexBufferRes.pointer); + const u32 vertexBufferOffset = vertexBufferRes.buffer_offset; + + gl.bindVAO(hwShaderVAO); + + // Enable or disable vertex attributes as needed + const u32 currentAttributeMask = accel->enabledAttributeMask; + // Use bitwise xor to calculate which attributes changed + u32 attributeMaskDiff = currentAttributeMask ^ previousAttributeMask; + + while (attributeMaskDiff != 0) { + // Get index of next different attribute and turn it off + const u32 index = 31 - std::countl_zero(attributeMaskDiff); + const u32 mask = 1u << index; + attributeMaskDiff ^= mask; + + if ((currentAttributeMask & mask) != 0) { + // Attribute was disabled and is now enabled + hwShaderVAO.enableAttribute(index); + } else { + // Attribute was enabled and is now disabled + hwShaderVAO.disableAttribute(index); + } + } + + previousAttributeMask = currentAttributeMask; + + // Upload the data for each (enabled) attribute loader into our vertex buffer + for (int i = 0; i < accel->totalLoaderCount; i++) { + auto& loader = accel->loaders[i]; + + std::memcpy(vertexData, loader.data, loader.size); + vertexData += loader.size; + } + + hwVertexBuffer->Unmap(accel->vertexDataSize); + + // Iterate over the 16 PICA input registers and configure how they should be fetched. + for (int i = 0; i < 16; i++) { + const auto& attrib = accel->attributeInfo[i]; + const u32 attributeMask = 1u << i; + + if (accel->fixedAttributes & attributeMask) { + auto& attrValue = fixedAttrValues[i]; + // This is a fixed attribute, so set its fixed value, but only if it actually needs to be updated + if (attrValue[0] != attrib.fixedValue[0] || attrValue[1] != attrib.fixedValue[1] || attrValue[2] != attrib.fixedValue[2] || + attrValue[3] != attrib.fixedValue[3]) { + std::memcpy(attrValue.data(), attrib.fixedValue.data(), sizeof(attrib.fixedValue)); + glVertexAttrib4f(i, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]); + } + } else if (accel->enabledAttributeMask & attributeMask) { + glVertexAttribPointer( + i, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride, + reinterpret_cast(vertexBufferOffset + attrib.offset) + ); + } + } } \ No newline at end of file diff --git a/src/core/renderer_mtl/metal_cpp_impl.cpp b/src/core/renderer_mtl/metal_cpp_impl.cpp new file mode 100644 index 00000000..7fa7137b --- /dev/null +++ b/src/core/renderer_mtl/metal_cpp_impl.cpp @@ -0,0 +1,6 @@ +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION +#include +#include +#include diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp new file mode 100644 index 00000000..420a60ca --- /dev/null +++ b/src/core/renderer_mtl/mtl_etc1.cpp @@ -0,0 +1,116 @@ +#include + +#include "colour.hpp" +#include "renderer_mtl/mtl_texture.hpp" +#include "renderer_mtl/renderer_mtl.hpp" + + +using namespace Helpers; + +namespace Metal { + static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); + } + + u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); + if (!hasAlpha) { + offs >>= 1; + } + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + return decodeETC(alpha, u, v, colourData); + } + + u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { + static constexpr u32 modifiers[8][2] = { + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_lut_texture.cpp b/src/core/renderer_mtl/mtl_lut_texture.cpp new file mode 100644 index 00000000..8486a50c --- /dev/null +++ b/src/core/renderer_mtl/mtl_lut_texture.cpp @@ -0,0 +1,27 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +namespace Metal { + static constexpr u32 LAYER_COUNT = 1024; + + LutTexture::LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name) { + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); + desc->setTextureType(type); + desc->setPixelFormat(pixelFormat); + desc->setWidth(width); + desc->setHeight(height); + desc->setArrayLength(LAYER_COUNT); + desc->setUsage(MTL::TextureUsageShaderRead /* | MTL::TextureUsageShaderWrite*/); + desc->setStorageMode(MTL::StorageModeShared); + + texture = device->newTexture(desc); + texture->setLabel(toNSString(name)); + desc->release(); + } + + LutTexture::~LutTexture() { texture->release(); } + + u32 LutTexture::getNextIndex() { + currentIndex = (currentIndex + 1) % LAYER_COUNT; + return currentIndex; + } +} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp new file mode 100644 index 00000000..149fea26 --- /dev/null +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -0,0 +1,308 @@ +#include "renderer_mtl/mtl_texture.hpp" + +#include + +#include "colour.hpp" +#include "renderer_mtl/objc_helper.hpp" + + +using namespace Helpers; + +namespace Metal { + void Texture::allocate() { + formatInfo = PICA::getPixelFormatInfo(format); + + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init(); + descriptor->setTextureType(MTL::TextureType2D); + descriptor->setPixelFormat(formatInfo.pixelFormat); + descriptor->setWidth(size.u()); + descriptor->setHeight(size.v()); + descriptor->setUsage(MTL::TextureUsageShaderRead); + descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers? + texture = device->newTexture(descriptor); + texture->setLabel(toNSString( + "Texture " + std::string(PICA::textureFormatToString(format)) + " " + std::to_string(size.u()) + "x" + std::to_string(size.v()) + )); + descriptor->release(); + + setNewConfig(config); + } + + // Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on + void Texture::setNewConfig(u32 cfg) { + config = cfg; + + if (sampler) { + sampler->release(); + } + + const auto magFilter = (cfg & 0x2) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto minFilter = (cfg & 0x4) != 0 ? MTL::SamplerMinMagFilterLinear : MTL::SamplerMinMagFilterNearest; + const auto wrapT = PICA::toMTLSamplerAddressMode(getBits<8, 3>(cfg)); + const auto wrapS = PICA::toMTLSamplerAddressMode(getBits<12, 3>(cfg)); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMinFilter(minFilter); + samplerDescriptor->setMagFilter(magFilter); + samplerDescriptor->setSAddressMode(wrapS); + samplerDescriptor->setTAddressMode(wrapT); + + samplerDescriptor->setLabel(toNSString("Sampler")); + sampler = device->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); + } + + void Texture::free() { + valid = false; + + if (texture) { + texture->release(); + } + if (sampler) { + sampler->release(); + } + } + + u64 Texture::sizeInBytes() { + u64 pixelCount = u64(size.x()) * u64(size.y()); + + switch (format) { + case PICA::TextureFmt::RGBA8: // 4 bytes per pixel + return pixelCount * 4; + + case PICA::TextureFmt::RGB8: // 3 bytes per pixel + return pixelCount * 3; + + case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel + case PICA::TextureFmt::RGB565: + case PICA::TextureFmt::RGBA4: + case PICA::TextureFmt::RG8: + case PICA::TextureFmt::IA8: return pixelCount * 2; + + case PICA::TextureFmt::A8: // 1 byte per pixel + case PICA::TextureFmt::I8: + case PICA::TextureFmt::IA4: return pixelCount; + + case PICA::TextureFmt::I4: // 4 bits per pixel + case PICA::TextureFmt::A4: return pixelCount / 2; + + case PICA::TextureFmt::ETC1: // Compressed formats + case PICA::TextureFmt::ETC1A4: { + // Number of 4x4 tiles + const u64 tileCount = pixelCount / 16; + // Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4 + const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16; + return tileCount * tileSize; + } + + default: Helpers::panic("[PICA] Attempted to get size of invalid texture type"); + } + } + + // u and v are the UVs of the relevant texel + // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here + // https://en.wikipedia.org/wiki/Z-order_curve + // Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel + // The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 + // As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg + u32 Texture::mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; + static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; + + return xOffsets[u & 7] + yOffsets[v & 7]; + } + + // Get the byte offset of texel (u, v) in the texture + u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; + } + + // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte + u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; + } + + u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::A4: { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = data[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + // A8 + return alpha; + } + + case PICA::TextureFmt::A8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = data[offset]; + + // A8 + return alpha; + } + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + u16 Texture::decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RG8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = data[offset]; + const u8 r = data[offset + 1]; + + // RG8 + return (g << 8) | r; + } + + case PICA::TextureFmt::RGBA4: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + // ABGR4 + return (r << 12) | (g << 8) | (b << 4) | alpha; + } + + case PICA::TextureFmt::RGBA5551: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + // BGR5A1 + return (alpha << 15) | (r << 10) | (g << 5) | b; + } + + case PICA::TextureFmt::RGB565: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8); + + const u8 b = getBits<0, 5, u8>(texel); + const u8 g = getBits<5, 6, u8>(texel); + const u8 r = getBits<11, 5, u8>(texel); + + // B5G6R5 + return (r << 11) | (g << 5) | b; + } + + case PICA::TextureFmt::IA4: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = data[offset]; + const u8 alpha = texel & 0xf; + const u8 intensity = texel >> 4; + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | alpha; + } + + case PICA::TextureFmt::I4: { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = data[offset] >> ((u % 2) ? 4 : 0); + intensity = getBits<0, 4>(intensity); + + // ABGR4 + return (intensity << 12) | (intensity << 8) | (intensity << 4) | 0xff; + } + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + u32 Texture::decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { + switch (fmt) { + case PICA::TextureFmt::RGB8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = data[offset]; + const u8 g = data[offset + 1]; + const u8 r = data[offset + 2]; + + // RGBA8 + return (0xff << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::RGBA8: { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = data[offset]; + const u8 b = data[offset + 1]; + const u8 g = data[offset + 2]; + const u8 r = data[offset + 3]; + + // RGBA8 + return (alpha << 24) | (b << 16) | (g << 8) | r; + } + + case PICA::TextureFmt::I8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = data[offset]; + + // RGBA8 + return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::IA8: { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = data[offset]; + const u8 intensity = data[offset + 1]; + + // RGBA8 + return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity; + } + + case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data); + case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data); + + default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); + } + } + + void Texture::decodeTexture(std::span data) { + std::vector decoded; + decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); + + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + if (formatInfo.bytesPerTexel == 1) { + u8 texel = decodeTexelU8(u, v, format, data); + decoded.push_back(texel); + } else if (formatInfo.bytesPerTexel == 2) { + u16 texel = decodeTexelU16(u, v, format, data); + decoded.push_back((texel & 0x00ff) >> 0); + decoded.push_back((texel & 0xff00) >> 8); + } else if (formatInfo.bytesPerTexel == 4) { + u32 texel = decodeTexelU32(u, v, format, data); + decoded.push_back((texel & 0x000000ff) >> 0); + decoded.push_back((texel & 0x0000ff00) >> 8); + decoded.push_back((texel & 0x00ff0000) >> 16); + decoded.push_back((texel & 0xff000000) >> 24); + } else { + Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); + } + } + } + + texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); + } +} // namespace Metal diff --git a/src/core/renderer_mtl/objc_helper.mm b/src/core/renderer_mtl/objc_helper.mm new file mode 100644 index 00000000..eeea56a0 --- /dev/null +++ b/src/core/renderer_mtl/objc_helper.mm @@ -0,0 +1,12 @@ +#include "renderer_mtl/objc_helper.hpp" + +// TODO: change the include +#import + +namespace Metal { + +dispatch_data_t createDispatchData(const void* data, size_t size) { + return dispatch_data_create(data, size, dispatch_get_global_queue(0, 0), ^{}); +} + +} // namespace Metal diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp new file mode 100644 index 00000000..df1f8b47 --- /dev/null +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -0,0 +1,824 @@ +#include "renderer_mtl/renderer_mtl.hpp" + +#include +#include + +#include "renderer_mtl/mtl_lut_texture.hpp" + +// Hack: Apple annoyingly defines a global "NO" macro which ends up conflicting with our own code... +#undef NO + +#include "PICA/gpu.hpp" +#include "SDL_metal.h" + +using namespace PICA; + +CMRC_DECLARE(RendererMTL); + +static constexpr u16 LIGHTING_LUT_TEXTURE_WIDTH = 256; +static constexpr u32 FOG_LUT_TEXTURE_WIDTH = 128; +// Bind the vertex buffer to binding 30 so that it doesn't occupy the lower indices +static constexpr uint VERTEX_BUFFER_BINDING_INDEX = 30; + +// HACK: redefinition... +PICA::ColorFmt ToColorFormat(u32 format) { + switch (format) { + case 2: return PICA::ColorFmt::RGB565; + case 3: return PICA::ColorFmt::RGBA5551; + default: return static_cast(format); + } +} + +MTL::Library* loadLibrary(MTL::Device* device, const cmrc::file& shaderSource) { + // MTL::CompileOptions* compileOptions = MTL::CompileOptions::alloc()->init(); + NS::Error* error = nullptr; + MTL::Library* library = device->newLibrary(Metal::createDispatchData(shaderSource.begin(), shaderSource.size()), &error); + // MTL::Library* library = device->newLibrary(NS::String::string(source.c_str(), NS::ASCIIStringEncoding), compileOptions, &error); + if (error) { + Helpers::panic("Error loading shaders: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + + return library; +} + +RendererMTL::RendererMTL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) + : Renderer(gpu, internalRegs, externalRegs) {} + +RendererMTL::~RendererMTL() {} + +void RendererMTL::reset() { + vertexBufferCache.reset(); + depthStencilCache.reset(); + drawPipelineCache.reset(); + blitPipelineCache.reset(); + textureCache.reset(); + depthStencilRenderTargetCache.reset(); + colorRenderTargetCache.reset(); +} + +void RendererMTL::display() { + CA::MetalDrawable* drawable = metalLayer->nextDrawable(); + if (!drawable) { + return; + } + + using namespace PICA::ExternalRegs; + + // Top screen + const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1; + const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr]; + auto topScreen = colorRenderTargetCache.findFromAddress(topScreenAddr); + + if (topScreen) { + clearColor(nullptr, topScreen->get().texture); + } + + // Bottom screen + const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1; + const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr]; + auto bottomScreen = colorRenderTargetCache.findFromAddress(bottomScreenAddr); + + if (bottomScreen) { + clearColor(nullptr, bottomScreen->get().texture); + } + + // Draw + commandBuffer->pushDebugGroup(toNSString("Display")); + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + MTL::RenderPassColorAttachmentDescriptor* colorAttachment = renderPassDescriptor->colorAttachments()->object(0); + colorAttachment->setTexture(drawable->texture()); + colorAttachment->setLoadAction(MTL::LoadActionClear); + colorAttachment->setClearColor(MTL::ClearColor{0.0f, 0.0f, 0.0f, 1.0f}); + colorAttachment->setStoreAction(MTL::StoreActionStore); + + nextRenderPassName = "Display"; + beginRenderPassIfNeeded(renderPassDescriptor, false, drawable->texture()); + renderCommandEncoder->setRenderPipelineState(displayPipeline); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, 0); + + // Top screen + if (topScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{0, 0, 400, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(topScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + // Bottom screen + if (bottomScreen) { + renderCommandEncoder->setViewport(MTL::Viewport{40, 240, 320, 240, 0.0f, 1.0f}); + renderCommandEncoder->setFragmentTexture(bottomScreen->get().texture, 0); + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } + + endRenderPass(); + + commandBuffer->presentDrawable(drawable); + commandBuffer->popDebugGroup(); + commitCommandBuffer(); + + // Inform the vertex buffer cache that the frame ended + vertexBufferCache.endFrame(); + + // Release + drawable->release(); +} + +void RendererMTL::initGraphicsContext(SDL_Window* window) { + // TODO: what should be the type of the view? + void* view = SDL_Metal_CreateView(window); + metalLayer = (CA::MetalLayer*)SDL_Metal_GetLayer(view); + device = MTL::CreateSystemDefaultDevice(); + metalLayer->setDevice(device); + commandQueue = device->newCommandQueue(); + + // Textures + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setTextureType(MTL::TextureType2D); + textureDescriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + textureDescriptor->setWidth(1); + textureDescriptor->setHeight(1); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + textureDescriptor->setUsage(MTL::TextureUsageShaderRead); + + nullTexture = device->newTexture(textureDescriptor); + nullTexture->setLabel(toNSString("Null texture")); + textureDescriptor->release(); + + // Samplers + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setLabel(toNSString("Sampler (nearest)")); + nearestSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setLabel(toNSString("Sampler (linear)")); + linearSampler = device->newSamplerState(samplerDescriptor); + + samplerDescriptor->release(); + + lutLightingTexture = new Metal::LutTexture( + device, MTL::TextureType2DArray, MTL::PixelFormatR16Unorm, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count, "Lighting LUT texture" + ); + lutFogTexture = new Metal::LutTexture(device, MTL::TextureType1DArray, MTL::PixelFormatRG32Float, FOG_LUT_TEXTURE_WIDTH, 1, "Fog LUT texture"); + + // -------- Pipelines -------- + + // Load shaders + auto mtlResources = cmrc::RendererMTL::get_filesystem(); + library = loadLibrary(device, mtlResources.open("metal_shaders.metallib")); + MTL::Library* blitLibrary = loadLibrary(device, mtlResources.open("metal_blit.metallib")); + // MTL::Library* copyToLutTextureLibrary = loadLibrary(device, mtlResources.open("metal_copy_to_lut_texture.metallib")); + + // Display + MTL::Function* vertexDisplayFunction = library->newFunction(NS::String::string("vertexDisplay", NS::ASCIIStringEncoding)); + MTL::Function* fragmentDisplayFunction = library->newFunction(NS::String::string("fragmentDisplay", NS::ASCIIStringEncoding)); + + MTL::RenderPipelineDescriptor* displayPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + displayPipelineDescriptor->setVertexFunction(vertexDisplayFunction); + displayPipelineDescriptor->setFragmentFunction(fragmentDisplayFunction); + auto* displayColorAttachment = displayPipelineDescriptor->colorAttachments()->object(0); + displayColorAttachment->setPixelFormat(MTL::PixelFormat::PixelFormatBGRA8Unorm); + + NS::Error* error = nullptr; + displayPipelineDescriptor->setLabel(toNSString("Display pipeline")); + displayPipeline = device->newRenderPipelineState(displayPipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating display pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + displayPipelineDescriptor->release(); + vertexDisplayFunction->release(); + fragmentDisplayFunction->release(); + + // Blit + MTL::Function* vertexBlitFunction = blitLibrary->newFunction(NS::String::string("vertexBlit", NS::ASCIIStringEncoding)); + MTL::Function* fragmentBlitFunction = blitLibrary->newFunction(NS::String::string("fragmentBlit", NS::ASCIIStringEncoding)); + + blitPipelineCache.set(device, vertexBlitFunction, fragmentBlitFunction); + + // Draw + MTL::Function* vertexDrawFunction = library->newFunction(NS::String::string("vertexDraw", NS::ASCIIStringEncoding)); + + // -------- Vertex descriptor -------- + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + + // Position + MTL::VertexAttributeDescriptor* positionAttribute = vertexDescriptor->attributes()->object(0); + positionAttribute->setFormat(MTL::VertexFormatFloat4); + positionAttribute->setOffset(offsetof(Vertex, s.positions)); + positionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Quaternion + MTL::VertexAttributeDescriptor* quaternionAttribute = vertexDescriptor->attributes()->object(1); + quaternionAttribute->setFormat(MTL::VertexFormatFloat4); + quaternionAttribute->setOffset(offsetof(Vertex, s.quaternion)); + quaternionAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Color + MTL::VertexAttributeDescriptor* colorAttribute = vertexDescriptor->attributes()->object(2); + colorAttribute->setFormat(MTL::VertexFormatFloat4); + colorAttribute->setOffset(offsetof(Vertex, s.colour)); + colorAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 + MTL::VertexAttributeDescriptor* texCoord0Attribute = vertexDescriptor->attributes()->object(3); + texCoord0Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord0Attribute->setOffset(offsetof(Vertex, s.texcoord0)); + texCoord0Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 1 + MTL::VertexAttributeDescriptor* texCoord1Attribute = vertexDescriptor->attributes()->object(4); + texCoord1Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord1Attribute->setOffset(offsetof(Vertex, s.texcoord1)); + texCoord1Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 0 W + MTL::VertexAttributeDescriptor* texCoord0WAttribute = vertexDescriptor->attributes()->object(5); + texCoord0WAttribute->setFormat(MTL::VertexFormatFloat); + texCoord0WAttribute->setOffset(offsetof(Vertex, s.texcoord0_w)); + texCoord0WAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // View + MTL::VertexAttributeDescriptor* viewAttribute = vertexDescriptor->attributes()->object(6); + viewAttribute->setFormat(MTL::VertexFormatFloat3); + viewAttribute->setOffset(offsetof(Vertex, s.view)); + viewAttribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + // Texture coordinate 2 + MTL::VertexAttributeDescriptor* texCoord2Attribute = vertexDescriptor->attributes()->object(7); + texCoord2Attribute->setFormat(MTL::VertexFormatFloat2); + texCoord2Attribute->setOffset(offsetof(Vertex, s.texcoord2)); + texCoord2Attribute->setBufferIndex(VERTEX_BUFFER_BINDING_INDEX); + + MTL::VertexBufferLayoutDescriptor* vertexBufferLayout = vertexDescriptor->layouts()->object(VERTEX_BUFFER_BINDING_INDEX); + vertexBufferLayout->setStride(sizeof(Vertex)); + vertexBufferLayout->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexBufferLayout->setStepRate(1); + + drawPipelineCache.set(device, library, vertexDrawFunction, vertexDescriptor); + + // Copy to LUT texture + /* + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init(); + constants->setConstantValue(&LIGHTING_LUT_TEXTURE_WIDTH, MTL::DataTypeUShort, NS::UInteger(0)); + + error = nullptr; + MTL::Function* vertexCopyToLutTextureFunction = + copyToLutTextureLibrary->newFunction(NS::String::string("vertexCopyToLutTexture", NS::ASCIIStringEncoding), constants, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture vertex function: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + constants->release(); + + MTL::RenderPipelineDescriptor* copyToLutTexturePipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + copyToLutTexturePipelineDescriptor->setVertexFunction(vertexCopyToLutTextureFunction); + // Disable rasterization + copyToLutTexturePipelineDescriptor->setRasterizationEnabled(false); + + error = nullptr; + copyToLutTexturePipelineDescriptor->setLabel(toNSString("Copy to LUT texture pipeline")); + copyToLutTexturePipeline = device->newRenderPipelineState(copyToLutTexturePipelineDescriptor, &error); + if (error) { + Helpers::panic("Error creating copy_to_lut_texture pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding)); + } + copyToLutTexturePipelineDescriptor->release(); + vertexCopyToLutTextureFunction->release(); + */ + + // Depth stencil cache + depthStencilCache.set(device); + + // Vertex buffer cache + vertexBufferCache.set(device); + + // -------- Depth stencil state -------- + MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthStencilDescriptor->setLabel(toNSString("Default depth stencil state")); + defaultDepthStencilState = device->newDepthStencilState(depthStencilDescriptor); + depthStencilDescriptor->release(); + + blitLibrary->release(); + // copyToLutTextureLibrary->release(); +} + +void RendererMTL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { + const auto color = colorRenderTargetCache.findFromAddress(startAddress); + if (color) { + const float r = Helpers::getBits<24, 8>(value) / 255.0f; + const float g = Helpers::getBits<16, 8>(value) / 255.0f; + const float b = Helpers::getBits<8, 8>(value) / 255.0f; + const float a = (value & 0xff) / 255.0f; + + colorClearOps[color->get().texture] = {r, g, b, a}; + + return; + } + + const auto depth = depthStencilRenderTargetCache.findFromAddress(startAddress); + if (depth) { + float depthVal; + const auto format = depth->get().format; + if (format == DepthFmt::Depth16) { + depthVal = (value & 0xffff) / 65535.0f; + } else { + depthVal = (value & 0xffffff) / 16777215.0f; + } + + depthClearOps[depth->get().texture] = depthVal; + + if (format == DepthFmt::Depth24Stencil8) { + const u8 stencilVal = value >> 24; + stencilClearOps[depth->get().texture] = stencilVal; + } + + return; + } + + Helpers::warn("[RendererMTL::ClearBuffer] No buffer found!\n"); +} + +void RendererMTL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) { + const u32 inputWidth = inputSize & 0xffff; + const u32 inputHeight = inputSize >> 16; + const auto inputFormat = ToColorFormat(Helpers::getBits<8, 3>(flags)); + const auto outputFormat = ToColorFormat(Helpers::getBits<12, 3>(flags)); + const bool verticalFlip = flags & 1; + const PICA::Scaling scaling = static_cast(Helpers::getBits<24, 2>(flags)); + + u32 outputWidth = outputSize & 0xffff; + u32 outputHeight = outputSize >> 16; + + auto srcFramebuffer = getColorRenderTarget(inputAddr, inputFormat, inputWidth, outputHeight); + nextRenderPassName = "Clear before display transfer"; + clearColor(nullptr, srcFramebuffer->texture); + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight); + + if (verticalFlip) { + std::swap(srcRect.bottom, srcRect.top); + } + + // Apply scaling for the destination rectangle. + if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) { + outputWidth >>= 1; + } + + if (scaling == PICA::Scaling::XY) { + outputHeight >>= 1; + } + + auto destFramebuffer = getColorRenderTarget(outputAddr, outputFormat, outputWidth, outputHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight); + + if (inputWidth != outputWidth) { + // Helpers::warn("Strided display transfer is not handled correctly!\n"); + } + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) { + // Texture copy size is aligned to 16 byte units + const u32 copySize = totalBytes & ~0xf; + if (copySize == 0) { + Helpers::warn("TextureCopy total bytes less than 16!\n"); + return; + } + + // The width and gap are provided in 16-byte units. + const u32 inputWidth = (inputSize & 0xffff) << 4; + const u32 inputGap = (inputSize >> 16) << 4; + const u32 outputWidth = (outputSize & 0xffff) << 4; + const u32 outputGap = (outputSize >> 16) << 4; + + if (inputGap != 0 || outputGap != 0) { + // Helpers::warn("Strided texture copy\n"); + } + + if (inputWidth != outputWidth) { + Helpers::warn("Input width does not match output width, cannot accelerate texture copy!"); + return; + } + + // Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine. + // Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or + // the width multiplied by eight (because tiles are stored linearly in memory). + // To properly accelerate this we must examine each surface individually. For now we assume the most common case + // of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting + // that surface is not correct. + + // We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it + // by eight * sizePerPixel(RGBA8) to convert it to a useable width. + const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8); + const u32 copyStride = (inputWidth + inputGap) / (8 * bpp); + const u32 copyWidth = inputWidth / (8 * bpp); + + // inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region + // in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result + // is the number of vertical tiles so multiply that by eight to get the actual copy height. + u32 copyHeight; + if (inputWidth != 0) [[likely]] { + copyHeight = (copySize / inputWidth) * 8; + } else { + copyHeight = 0; + } + + // Find the source surface. + auto srcFramebuffer = getColorRenderTarget(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false); + if (!srcFramebuffer) { + Helpers::warn("RendererMTL::TextureCopy failed to locate src framebuffer!\n"); + return; + } + nextRenderPassName = "Clear before texture copy"; + clearColor(nullptr, srcFramebuffer->texture); + + Math::Rect srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight); + + // Assume the destination surface has the same format. Unless the surfaces have the same block width, + // texture copy does not make sense. + auto destFramebuffer = getColorRenderTarget(outputAddr, srcFramebuffer->format, copyWidth, copyHeight); + // TODO: clear if not blitting to the whole framebuffer + Math::Rect destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight); + + textureCopyImpl(*srcFramebuffer, *destFramebuffer, srcRect, destRect); +} + +void RendererMTL::drawVertices(PICA::PrimType primType, std::span vertices) { + // Color + auto colorRenderTarget = getColorRenderTarget(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]); + + // Depth stencil + const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask]; + const bool depthStencilWrite = regs[PICA::InternalRegs::DepthBufferWrite]; + const bool depthEnable = depthControl & 0x1; + const bool depthWriteEnable = Helpers::getBit<12>(depthControl); + const u8 depthFunc = Helpers::getBits<4, 3>(depthControl); + const u8 colorMask = Helpers::getBits<8, 4>(depthControl); + + Metal::DepthStencilHash depthStencilHash{false, 1}; + depthStencilHash.stencilConfig = regs[PICA::InternalRegs::StencilTest]; + depthStencilHash.stencilOpConfig = regs[PICA::InternalRegs::StencilOp]; + const bool stencilEnable = Helpers::getBit<0>(depthStencilHash.stencilConfig); + + std::optional depthStencilRenderTarget = std::nullopt; + if (depthEnable) { + depthStencilHash.depthStencilWrite = depthWriteEnable && depthStencilWrite; + depthStencilHash.depthFunc = depthFunc; + depthStencilRenderTarget = getDepthRenderTarget(); + } else { + if (depthWriteEnable) { + depthStencilHash.depthStencilWrite = true; + depthStencilRenderTarget = getDepthRenderTarget(); + } else if (stencilEnable) { + depthStencilRenderTarget = getDepthRenderTarget(); + } + } + + // Depth uniforms + struct { + float depthScale; + float depthOffset; + bool depthMapEnable; + } depthUniforms; + depthUniforms.depthScale = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + depthUniforms.depthOffset = Floats::f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + depthUniforms.depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + + // -------- Pipeline -------- + Metal::DrawPipelineHash pipelineHash{colorRenderTarget->format, DepthFmt::Unknown1}; + if (depthStencilRenderTarget) { + pipelineHash.depthFmt = depthStencilRenderTarget->format; + } + pipelineHash.fragHash.lightingEnabled = regs[0x008F] & 1; + pipelineHash.fragHash.lightingNumLights = regs[0x01C2] & 0x7; + pipelineHash.fragHash.lightingConfig1 = regs[0x01C4u]; + pipelineHash.fragHash.alphaControl = regs[0x104]; + + // Blending and logic op + pipelineHash.blendEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0; + pipelineHash.colorWriteMask = colorMask; + + u8 logicOp = 3; // Copy + if (pipelineHash.blendEnabled) { + pipelineHash.blendControl = regs[PICA::InternalRegs::BlendFunc]; + } else { + logicOp = Helpers::getBits<0, 4>(regs[PICA::InternalRegs::LogicOp]); + } + + MTL::RenderPipelineState* pipeline = drawPipelineCache.get(pipelineHash); + + // Depth stencil state + MTL::DepthStencilState* depthStencilState = depthStencilCache.get(depthStencilHash); + + // -------- Render -------- + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + bool doesClear = clearColor(renderPassDescriptor, colorRenderTarget->texture); + if (depthStencilRenderTarget) { + if (clearDepth(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + if (depthStencilRenderTarget->format == DepthFmt::Depth24Stencil8) { + if (clearStencil(renderPassDescriptor, depthStencilRenderTarget->texture)) doesClear = true; + } + } + + nextRenderPassName = "Draw vertices"; + beginRenderPassIfNeeded( + renderPassDescriptor, doesClear, colorRenderTarget->texture, (depthStencilRenderTarget ? depthStencilRenderTarget->texture : nullptr) + ); + + // Update the LUT texture if necessary + if (gpu.lightingLUTDirty) { + updateLightingLUT(renderCommandEncoder); + } + if (gpu.fogLUTDirty) { + updateFogLUT(renderCommandEncoder); + } + + commandEncoder.setRenderPipelineState(pipeline); + commandEncoder.setDepthStencilState(depthStencilState); + // If size is < 4KB, use inline vertex data, otherwise use a buffer + if (vertices.size_bytes() < 4 * 1024) { + renderCommandEncoder->setVertexBytes(vertices.data(), vertices.size_bytes(), VERTEX_BUFFER_BINDING_INDEX); + } else { + Metal::BufferHandle buffer = vertexBufferCache.get(vertices.data(), vertices.size_bytes()); + renderCommandEncoder->setVertexBuffer(buffer.buffer, buffer.offset, VERTEX_BUFFER_BINDING_INDEX); + } + + // Viewport + const u32 viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff; + const u32 viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff; + const u32 viewportWidth = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f; + const u32 viewportHeight = Floats::f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f; + const auto rect = colorRenderTarget->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]); + MTL::Viewport viewport{double(rect.left + viewportX), double(rect.bottom + viewportY), double(viewportWidth), double(viewportHeight), 0.0, 1.0}; + renderCommandEncoder->setViewport(viewport); + + // Blend color + if (pipelineHash.blendEnabled) { + u32 constantColor = regs[PICA::InternalRegs::BlendColour]; + const u8 r = constantColor & 0xff; + const u8 g = Helpers::getBits<8, 8>(constantColor); + const u8 b = Helpers::getBits<16, 8>(constantColor); + const u8 a = Helpers::getBits<24, 8>(constantColor); + + renderCommandEncoder->setBlendColor(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f); + } + + // Stencil reference + if (stencilEnable) { + const s8 reference = s8(Helpers::getBits<16, 8>(depthStencilHash.stencilConfig)); // Signed reference value + renderCommandEncoder->setStencilReferenceValue(reference); + } + + // Bind resources + setupTextureEnvState(renderCommandEncoder); + bindTexturesToSlots(); + renderCommandEncoder->setVertexBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setFragmentBytes(®s[0x48], (0x200 - 0x48) * sizeof(regs[0]), 0); + renderCommandEncoder->setVertexBytes(&depthUniforms, sizeof(depthUniforms), 2); + renderCommandEncoder->setFragmentBytes(&logicOp, sizeof(logicOp), 2); + u32 lutSlices[2] = {lutLightingTexture->getCurrentIndex(), lutFogTexture->getCurrentIndex()}; + renderCommandEncoder->setFragmentBytes(&lutSlices, sizeof(lutSlices), 3); + + renderCommandEncoder->drawPrimitives(toMTLPrimitiveType(primType), NS::UInteger(0), NS::UInteger(vertices.size())); +} + +void RendererMTL::screenshot(const std::string& name) { + // TODO: implement + Helpers::warn("RendererMTL::screenshot not implemented"); +} + +void RendererMTL::deinitGraphicsContext() { + reset(); + + delete lutLightingTexture; + delete lutFogTexture; + + // copyToLutTexturePipeline->release(); + displayPipeline->release(); + defaultDepthStencilState->release(); + nullTexture->release(); + linearSampler->release(); + nearestSampler->release(); + library->release(); + commandQueue->release(); + device->release(); +} + +std::optional RendererMTL::getColorRenderTarget( + u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound +) { + // Try to find an already existing buffer that contains the provided address + // This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to + // subrect of a surface and in case of texcopy we don't know the format of the surface. + auto buffer = colorRenderTargetCache.findFromAddress(addr); + if (buffer.has_value()) { + return buffer.value().get(); + } + + if (!createIfnotFound) { + return std::nullopt; + } + + // Otherwise create and cache a new buffer. + Metal::ColorRenderTarget sampleBuffer(device, addr, format, width, height); + auto& colorBuffer = colorRenderTargetCache.add(sampleBuffer); + + // Clear the color buffer + colorClearOps[colorBuffer.texture] = {0, 0, 0, 0}; + return colorBuffer; +} + +Metal::DepthStencilRenderTarget& RendererMTL::getDepthRenderTarget() { + Metal::DepthStencilRenderTarget sampleBuffer(device, depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]); + auto buffer = depthStencilRenderTargetCache.find(sampleBuffer); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + auto& depthBuffer = depthStencilRenderTargetCache.add(sampleBuffer); + + // Clear the depth buffer + depthClearOps[depthBuffer.texture] = 0.0f; + if (depthBuffer.format == DepthFmt::Depth24Stencil8) { + stencilClearOps[depthBuffer.texture] = 0; + } + + return depthBuffer; + } +} + +Metal::Texture& RendererMTL::getTexture(Metal::Texture& tex) { + auto buffer = textureCache.find(tex); + + if (buffer.has_value()) { + return buffer.value().get(); + } else { + const auto textureData = std::span{gpu.getPointerPhys(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory + Metal::Texture& newTex = textureCache.add(tex); + newTex.decodeTexture(textureData); + + return newTex; + } +} + +void RendererMTL::setupTextureEnvState(MTL::RenderCommandEncoder* encoder) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + struct { + u32 textureEnvSourceRegs[6]; + u32 textureEnvOperandRegs[6]; + u32 textureEnvCombinerRegs[6]; + u32 textureEnvScaleRegs[6]; + } envState; + u32 textureEnvColourRegs[6]; + + for (int i = 0; i < 6; i++) { + const u32 ioBase = ioBases[i]; + + envState.textureEnvSourceRegs[i] = regs[ioBase]; + envState.textureEnvOperandRegs[i] = regs[ioBase + 1]; + envState.textureEnvCombinerRegs[i] = regs[ioBase + 2]; + textureEnvColourRegs[i] = regs[ioBase + 3]; + envState.textureEnvScaleRegs[i] = regs[ioBase + 4]; + } + + encoder->setVertexBytes(&textureEnvColourRegs, sizeof(textureEnvColourRegs), 1); + encoder->setFragmentBytes(&envState, sizeof(envState), 1); +} + +void RendererMTL::bindTexturesToSlots() { + static constexpr std::array ioBases = { + PICA::InternalRegs::Tex0BorderColor, + PICA::InternalRegs::Tex1BorderColor, + PICA::InternalRegs::Tex2BorderColor, + }; + + for (int i = 0; i < 3; i++) { + if ((regs[PICA::InternalRegs::TexUnitCfg] & (1 << i)) == 0) { + commandEncoder.setFragmentTexture(nullTexture, i); + commandEncoder.setFragmentSamplerState(nearestSampler, i); + continue; + } + + const size_t ioBase = ioBases[i]; + + const u32 dim = regs[ioBase + 1]; + const u32 config = regs[ioBase + 2]; + const u32 height = dim & 0x7ff; + const u32 width = Helpers::getBits<16, 11>(dim); + const u32 addr = (regs[ioBase + 4] & 0x0FFFFFFF) << 3; + u32 format = regs[ioBase + (i == 0 ? 13 : 5)] & 0xF; + + if (addr != 0) [[likely]] { + Metal::Texture targetTex(device, addr, static_cast(format), width, height, config); + auto tex = getTexture(targetTex); + commandEncoder.setFragmentTexture(tex.texture, i); + commandEncoder.setFragmentSamplerState(tex.sampler ? tex.sampler : nearestSampler, i); + } else { + // TODO: Bind a blank texture here. Some games, like Pokemon X, will render with a texture bound to nullptr, triggering GPU open bus + // Binding a blank texture makes all of those games look normal + } + } +} + +void RendererMTL::updateLightingLUT(MTL::RenderCommandEncoder* encoder) { + gpu.lightingLUTDirty = false; + + std::array lightingLut; + + for (int i = 0; i < gpu.lightingLUT.size(); i++) { + uint64_t value = gpu.lightingLUT[i] & 0xFFF; + lightingLut[i] = (value << 4); + } + + u32 index = lutLightingTexture->getNextIndex(); + lutLightingTexture->getTexture()->replaceRegion( + MTL::Region(0, 0, LIGHTING_LUT_TEXTURE_WIDTH, Lights::LUT_Count), 0, index, lightingLut.data(), LIGHTING_LUT_TEXTURE_WIDTH * 2, 0 + ); +} + +void RendererMTL::updateFogLUT(MTL::RenderCommandEncoder* encoder) { + gpu.fogLUTDirty = false; + + std::array fogLut = {0.0f}; + + for (int i = 0; i < fogLut.size(); i += 2) { + const uint32_t value = gpu.fogLUT[i >> 1]; + int32_t diff = value & 0x1fff; + diff = (diff << 19) >> 19; // Sign extend the 13-bit value to 32 bits + const float fogDifference = float(diff) / 2048.0f; + const float fogValue = float((value >> 13) & 0x7ff) / 2048.0f; + + fogLut[i] = fogValue; + fogLut[i + 1] = fogDifference; + } + + u32 index = lutFogTexture->getNextIndex(); + lutFogTexture->getTexture()->replaceRegion(MTL::Region(0, 0, FOG_LUT_TEXTURE_WIDTH, 1), 0, index, fogLut.data(), 0, 0); +} + +void RendererMTL::textureCopyImpl( + Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect& srcRect, + const Math::Rect& destRect +) { + nextRenderPassName = "Texture copy"; + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); + // TODO: clearColor sets the load action to load if it didn't find any clear, but that is unnecessary if we are doing a copy to the whole + // texture + bool doesClear = clearColor(renderPassDescriptor, destFramebuffer.texture); + beginRenderPassIfNeeded(renderPassDescriptor, doesClear, destFramebuffer.texture); + + // Pipeline + Metal::BlitPipelineHash hash{destFramebuffer.format, DepthFmt::Unknown1}; + auto blitPipeline = blitPipelineCache.get(hash); + + commandEncoder.setRenderPipelineState(blitPipeline); + + // Viewport + renderCommandEncoder->setViewport(MTL::Viewport{ + double(destRect.left), double(destRect.bottom), double(destRect.right - destRect.left), double(destRect.top - destRect.bottom), 0.0, 1.0}); + + float srcRectNDC[4] = { + srcRect.left / (float)srcFramebuffer.size.u(), + srcRect.bottom / (float)srcFramebuffer.size.v(), + (srcRect.right - srcRect.left) / (float)srcFramebuffer.size.u(), + (srcRect.top - srcRect.bottom) / (float)srcFramebuffer.size.v(), + }; + + // Bind resources + renderCommandEncoder->setVertexBytes(&srcRectNDC, sizeof(srcRectNDC), 0); + renderCommandEncoder->setFragmentTexture(srcFramebuffer.texture, GET_HELPER_TEXTURE_BINDING(0)); + renderCommandEncoder->setFragmentSamplerState(nearestSampler, GET_HELPER_SAMPLER_STATE_BINDING(0)); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} + +void RendererMTL::beginRenderPassIfNeeded( + MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture +) { + createCommandBufferIfNeeded(); + + if (doesClears || !renderCommandEncoder || colorTexture != lastColorTexture || + (depthTexture != lastDepthTexture && !(lastDepthTexture && !depthTexture))) { + endRenderPass(); + + renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); + renderCommandEncoder->setLabel(toNSString(nextRenderPassName)); + commandEncoder.newRenderCommandEncoder(renderCommandEncoder); + + // Bind persistent resources + + // LUT texture + renderCommandEncoder->setFragmentTexture(lutLightingTexture->getTexture(), 3); + renderCommandEncoder->setFragmentTexture(lutFogTexture->getTexture(), 4); + renderCommandEncoder->setFragmentSamplerState(linearSampler, 3); + + lastColorTexture = colorTexture; + lastDepthTexture = depthTexture; + } + + renderPassDescriptor->release(); +} diff --git a/src/core/services/cam.cpp b/src/core/services/cam.cpp index b3dfd1dc..d9c005e7 100644 --- a/src/core/services/cam.cpp +++ b/src/core/services/cam.cpp @@ -343,7 +343,7 @@ void CAMService::startCapture(u32 messagePointer) { if (port.isValid()) { for (int i : port.getPortIndices()) { - auto& event = ports[port.getSingleIndex()].receiveEvent; + auto& event = ports[i].receiveEvent; // Until we properly implement cameras, immediately signal the receive event if (event.has_value()) { diff --git a/src/core/services/fonts.cpp b/src/core/services/fonts.cpp new file mode 100644 index 00000000..ec4652ee --- /dev/null +++ b/src/core/services/fonts.cpp @@ -0,0 +1,109 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.cpp + +#include "services/fonts.hpp" + +#include + +namespace HLE::Fonts { + void relocateSharedFont(u8* sharedFont, u32 newAddress) { + constexpr u32 sharedFontStartOffset = 0x80; + const u8* cfntData = &sharedFont[sharedFontStartOffset]; + + CFNT cfnt; + std::memcpy(&cfnt, cfntData, sizeof(cfnt)); + + u32 assumedCmapOffset = 0; + u32 assumedCwdhOffset = 0; + u32 assumedTglpOffset = 0; + u32 firstCmapOffset = 0; + u32 firstCwdhOffset = 0; + u32 firstTglpOffset = 0; + + // First discover the location of sections so that the rebase offset can be auto-detected + u32 currentOffset = sharedFontStartOffset + cfnt.headerSize; + for (uint block = 0; block < cfnt.numBlocks; ++block) { + const u8* data = &sharedFont[currentOffset]; + + SectionHeader sectionHeader; + std::memcpy(§ionHeader, data, sizeof(sectionHeader)); + + if (firstCmapOffset == 0 && std::memcmp(sectionHeader.magic, "CMAP", 4) == 0) { + firstCmapOffset = currentOffset; + } else if (firstCwdhOffset == 0 && std::memcmp(sectionHeader.magic, "CWDH", 4) == 0) { + firstCwdhOffset = currentOffset; + } else if (firstTglpOffset == 0 && std::memcmp(sectionHeader.magic, "TGLP", 4) == 0) { + firstTglpOffset = currentOffset; + } else if (std::memcmp(sectionHeader.magic, "FINF", 4) == 0) { + Fonts::FINF finf; + std::memcpy(&finf, data, sizeof(finf)); + + assumedCmapOffset = finf.cmapOffset - sizeof(SectionHeader); + assumedCwdhOffset = finf.cwdhOffset - sizeof(SectionHeader); + assumedTglpOffset = finf.tglpOffset - sizeof(SectionHeader); + } + + currentOffset += sectionHeader.sectionSize; + } + + u32 previousBase = assumedCmapOffset - firstCmapOffset; + if ((previousBase != assumedCwdhOffset - firstCwdhOffset) || (previousBase != assumedTglpOffset - firstTglpOffset)) { + Helpers::warn("You shouldn't be seeing this. Shared Font file offsets might be borked?"); + } + + u32 offset = newAddress - previousBase; + + // Reset pointer back to start of sections and do the actual rebase + currentOffset = sharedFontStartOffset + cfnt.headerSize; + for (uint block = 0; block < cfnt.numBlocks; ++block) { + u8* data = &sharedFont[currentOffset]; + + SectionHeader sectionHeader; + std::memcpy(§ionHeader, data, sizeof(sectionHeader)); + + if (std::memcmp(sectionHeader.magic, "FINF", 4) == 0) { + Fonts::FINF finf; + std::memcpy(&finf, data, sizeof(finf)); + + // Relocate the offsets in the FINF section + finf.cmapOffset += offset; + finf.cwdhOffset += offset; + finf.tglpOffset += offset; + + std::memcpy(data, &finf, sizeof(finf)); + } else if (std::memcmp(sectionHeader.magic, "CMAP", 4) == 0) { + Fonts::CMAP cmap; + std::memcpy(&cmap, data, sizeof(cmap)); + + // Relocate the offsets in the CMAP section + if (cmap.nextCmapOffset != 0) { + cmap.nextCmapOffset += offset; + } + + std::memcpy(data, &cmap, sizeof(cmap)); + } else if (std::memcmp(sectionHeader.magic, "CWDH", 4) == 0) { + Fonts::CWDH cwdh; + std::memcpy(&cwdh, data, sizeof(cwdh)); + + // Relocate the offsets in the CWDH section + if (cwdh.nextCwdhOffset != 0) { + cwdh.nextCwdhOffset += offset; + } + + std::memcpy(data, &cwdh, sizeof(cwdh)); + } else if (std::memcmp(sectionHeader.magic, "TGLP", 4) == 0) { + Fonts::TGLP tglp; + std::memcpy(&tglp, data, sizeof(tglp)); + + // Relocate the offsets in the TGLP section + tglp.sheetDataOffset += offset; + std::memcpy(data, &tglp, sizeof(tglp)); + } + + currentOffset += sectionHeader.sectionSize; + } + } +} // namespace HLE::Fonts diff --git a/src/core/services/fonts/CitraSharedFontUSRelocated.bin b/src/core/services/fonts/SharedFontReplacement.bin similarity index 100% rename from src/core/services/fonts/CitraSharedFontUSRelocated.bin rename to src/core/services/fonts/SharedFontReplacement.bin diff --git a/src/core/services/fs.cpp b/src/core/services/fs.cpp index 2e102958..e81db6cd 100644 --- a/src/core/services/fs.cpp +++ b/src/core/services/fs.cpp @@ -105,7 +105,7 @@ ArchiveBase* FSService::getArchiveFromID(u32 id, const FSPath& archivePath) { } } -std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPath& path, const FSPath& archivePath, const FilePerms& perms) { +std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPath& path, const FSPath& archivePath, const FilePerms& perms) { FileDescriptor opened = archive->openFile(path, perms); if (opened.has_value()) { // If opened doesn't have a value, we failed to open the file auto handle = kernel.makeObject(KernelObjectType::File); @@ -119,7 +119,7 @@ std::optional FSService::openFileHandle(ArchiveBase* archive, const FSPa } } -Rust::Result FSService::openDirectoryHandle(ArchiveBase* archive, const FSPath& path) { +Rust::Result FSService::openDirectoryHandle(ArchiveBase* archive, const FSPath& path) { Rust::Result opened = archive->openDirectory(path); if (opened.isOk()) { // If opened doesn't have a value, we failed to open the directory auto handle = kernel.makeObject(KernelObjectType::Directory); @@ -132,7 +132,7 @@ Rust::Result FSService::openDirectoryHandle(Archi } } -Rust::Result FSService::openArchiveHandle(u32 archiveID, const FSPath& path) { +Rust::Result FSService::openArchiveHandle(u32 archiveID, const FSPath& path) { ArchiveBase* archive = getArchiveFromID(archiveID, path); if (archive == nullptr) [[unlikely]] { diff --git a/src/core/services/hid.cpp b/src/core/services/hid.cpp index ef6cbb41..a7b9b13b 100644 --- a/src/core/services/hid.cpp +++ b/src/core/services/hid.cpp @@ -35,6 +35,7 @@ void HIDService::reset() { circlePadX = circlePadY = 0; touchScreenX = touchScreenY = 0; roll = pitch = yaw = 0; + accelX = accelY = accelZ = 0; } void HIDService::handleSyncRequest(u32 messagePointer) { @@ -103,7 +104,6 @@ void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) { void HIDService::getGyroscopeCoefficient(u32 messagePointer) { log("HID::GetGyroscopeLowRawToDpsCoefficient\n"); - constexpr float gyroscopeCoeff = 14.375f; // Same as retail 3DS mem.write32(messagePointer, IPC::responseHeader(0x15, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, Helpers::bit_cast(gyroscopeCoeff)); @@ -190,6 +190,20 @@ void HIDService::updateInputs(u64 currentTick) { writeSharedMem(0x108, currentTick); // Write new tick count } writeSharedMem(0x118, nextAccelerometerIndex); // Index last updated by the HID module + const size_t accelEntryOffset = 0x128 + (nextAccelerometerIndex * 6); // Offset in the array of 8 accelerometer entries + + // Raw data of current accelerometer entry + // TODO: How is the "raw" data actually calculated? + s16* accelerometerDataRaw = getSharedMemPointer(0x120); + accelerometerDataRaw[0] = accelX; + accelerometerDataRaw[1] = accelY; + accelerometerDataRaw[2] = accelZ; + + // Accelerometer entry in entry table + s16* accelerometerData = getSharedMemPointer(accelEntryOffset); + accelerometerData[0] = accelX; + accelerometerData[1] = accelY; + accelerometerData[2] = accelZ; nextAccelerometerIndex = (nextAccelerometerIndex + 1) % 8; // Move to next entry // Next, update gyro state @@ -198,9 +212,10 @@ void HIDService::updateInputs(u64 currentTick) { writeSharedMem(0x158, currentTick); // Write new tick count } const size_t gyroEntryOffset = 0x178 + (nextGyroIndex * 6); // Offset in the array of 8 touchscreen entries - writeSharedMem(gyroEntryOffset, pitch); - writeSharedMem(gyroEntryOffset + 2, yaw); - writeSharedMem(gyroEntryOffset + 4, roll); + s16* gyroData = getSharedMemPointer(gyroEntryOffset); + gyroData[0] = pitch; + gyroData[1] = yaw; + gyroData[2] = roll; // Since gyroscope euler angles are relative, we zero them out here and the frontend will update them again when we receive a new rotation roll = pitch = yaw = 0; diff --git a/src/core/services/service_manager.cpp b/src/core/services/service_manager.cpp index 2a95b5c9..31e3d702 100644 --- a/src/core/services/service_manager.cpp +++ b/src/core/services/service_manager.cpp @@ -93,7 +93,7 @@ void ServiceManager::registerClient(u32 messagePointer) { } // clang-format off -static std::map serviceMap = { +static std::map serviceMap = { { "ac:u", KernelHandles::AC }, { "act:a", KernelHandles::ACT }, { "act:u", KernelHandles::ACT }, diff --git a/src/core/services/y2r.cpp b/src/core/services/y2r.cpp index a796631c..1c7b33cd 100644 --- a/src/core/services/y2r.cpp +++ b/src/core/services/y2r.cpp @@ -61,6 +61,7 @@ void Y2RService::reset() { inputLineWidth = 420; conversionCoefficients.fill(0); + isBusy = false; } void Y2RService::handleSyncRequest(u32 messagePointer) { @@ -156,6 +157,11 @@ void Y2RService::setTransferEndInterrupt(u32 messagePointer) { void Y2RService::stopConversion(u32 messagePointer) { log("Y2R::StopConversion\n"); + if (isBusy) { + isBusy = false; + kernel.getScheduler().removeEvent(Scheduler::EventType::SignalY2R); + } + mem.write32(messagePointer, IPC::responseHeader(0x27, 1, 0)); mem.write32(messagePointer + 4, Result::Success); } @@ -167,7 +173,7 @@ void Y2RService::isBusyConversion(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x28, 2, 0)); mem.write32(messagePointer + 4, Result::Success); - mem.write32(messagePointer + 8, static_cast(BusyStatus::NotBusy)); + mem.write32(messagePointer + 8, static_cast(isBusy ? BusyStatus::Busy : BusyStatus::NotBusy)); } void Y2RService::setBlockAlignment(u32 messagePointer) { @@ -434,11 +440,15 @@ void Y2RService::startConversion(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x26, 1, 0)); mem.write32(messagePointer + 4, Result::Success); - // Make Y2R conversion end instantly. - // Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt? - if (transferEndEvent.has_value()) { - kernel.signalEvent(transferEndEvent.value()); - } + // Schedule Y2R conversion end event. + // The tick value is tweaked based on the minimum delay needed to get FIFA 15 to not hang due to a race condition on its title screen + static constexpr u64 delayTicks = 1'350'000; + isBusy = true; + + // Remove any potential pending Y2R event and schedule a new one + Scheduler& scheduler = kernel.getScheduler(); + scheduler.removeEvent(Scheduler::EventType::SignalY2R); + scheduler.addEvent(Scheduler::EventType::SignalY2R, scheduler.currentTimestamp + delayTicks); } void Y2RService::isFinishedSendingYUV(u32 messagePointer) { @@ -484,4 +494,15 @@ void Y2RService::isFinishedReceiving(u32 messagePointer) { mem.write32(messagePointer, IPC::responseHeader(0x17, 2, 0)); mem.write32(messagePointer + 4, Result::Success); mem.write32(messagePointer + 8, finished ? 1 : 0); +} + +void Y2RService::signalConversionDone() { + if (isBusy) { + isBusy = false; + + // Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt? + if (transferEndEvent.has_value()) { + kernel.signalEvent(transferEndEvent.value()); + } + } } \ No newline at end of file diff --git a/src/emulator.cpp b/src/emulator.cpp index 7e891b56..5a74bf05 100644 --- a/src/emulator.cpp +++ b/src/emulator.cpp @@ -1,11 +1,13 @@ #include "emulator.hpp" -#ifndef __ANDROID__ +#if !defined(__ANDROID__) && !defined(__LIBRETRO__) #include #endif #include +#include "renderdoc.hpp" + #ifdef _WIN32 #include @@ -32,6 +34,10 @@ Emulator::Emulator() audioDevice.init(dsp->getSamples()); setAudioEnabled(config.audioEnabled); + if (Renderdoc::isSupported() && config.enableRenderdoc) { + loadRenderdoc(); + } + #ifdef PANDA3DS_ENABLE_DISCORD_RPC if (config.discordRpcEnabled) { discordRpc.init(); @@ -84,6 +90,7 @@ void Emulator::reset(ReloadOption reload) { } } +#ifndef __LIBRETRO__ std::filesystem::path Emulator::getAndroidAppPath() { // SDL_GetPrefPath fails to get the path due to no JNI environment std::ifstream cmdline("/proc/self/cmdline"); @@ -100,6 +107,7 @@ std::filesystem::path Emulator::getConfigPath() { return std::filesystem::current_path() / "config.toml"; } } +#endif void Emulator::step() {} void Emulator::render() {} @@ -165,10 +173,12 @@ void Emulator::pollScheduler() { case Scheduler::EventType::UpdateTimers: kernel.pollTimers(); break; case Scheduler::EventType::RunDSP: { - dsp->runAudioFrame(); + dsp->runAudioFrame(time); break; } + case Scheduler::EventType::SignalY2R: kernel.getServiceManager().getY2R().signalConversionDone(); break; + default: { Helpers::panic("Scheduler: Unimplemented event type received: %d\n", static_cast(eventType)); break; @@ -177,6 +187,7 @@ void Emulator::pollScheduler() { } } +#ifndef __LIBRETRO__ // Get path for saving files (AppData on Windows, /home/user/.local/share/ApplicationName on Linux, etc) // Inside that path, we be use a game-specific folder as well. Eg if we were loading a ROM called PenguinDemo.3ds, the savedata would be in // %APPDATA%/Alber/PenguinDemo/SaveData on Windows, and so on. We do this because games save data in their own filesystem on the cart. @@ -200,6 +211,7 @@ std::filesystem::path Emulator::getAppDataRoot() { return appDataPath; } +#endif bool Emulator::loadROM(const std::filesystem::path& path) { // Reset the emulator if we've already loaded a ROM @@ -214,6 +226,8 @@ bool Emulator::loadROM(const std::filesystem::path& path) { const std::filesystem::path appDataPath = getAppDataRoot(); const std::filesystem::path dataPath = appDataPath / path.filename().stem(); const std::filesystem::path aesKeysPath = appDataPath / "sysdata" / "aes_keys.txt"; + const std::filesystem::path seedDBPath = appDataPath / "sysdata" / "seeddb.bin"; + IOFile::setAppDataDir(dataPath); // Open the text file containing our AES keys if it exists. We use the std::filesystem::exists overload that takes an error code param to @@ -223,6 +237,10 @@ bool Emulator::loadROM(const std::filesystem::path& path) { aesEngine.loadKeys(aesKeysPath); } + if (std::filesystem::exists(seedDBPath, ec) && !ec) { + aesEngine.setSeedPath(seedDBPath); + } + kernel.initializeFS(); auto extension = path.extension(); bool success; // Tracks if we loaded the ROM successfully @@ -293,6 +311,11 @@ bool Emulator::load3DSX(const std::filesystem::path& path) { } bool Emulator::loadELF(const std::filesystem::path& path) { + // We can't open a new file with this ifstream if it's associated with a file + if (loadedELF.is_open()) { + loadedELF.close(); + } + loadedELF.open(path, std::ios_base::binary); // Open ROM in binary mode romType = ROMType::ELF; @@ -414,3 +437,9 @@ void Emulator::setAudioEnabled(bool enable) { dsp->setAudioEnabled(enable); } + +void Emulator::loadRenderdoc() { + std::string capturePath = (std::filesystem::current_path() / "RenderdocCaptures").generic_string(); + Renderdoc::loadRenderdoc(); + Renderdoc::setOutputDir(capturePath, ""); +} \ No newline at end of file diff --git a/src/host_shaders/metal_blit.metal b/src/host_shaders/metal_blit.metal new file mode 100644 index 00000000..31b94ec4 --- /dev/null +++ b/src/host_shaders/metal_blit.metal @@ -0,0 +1,29 @@ +#include +using namespace metal; + +#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding) +#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding) + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +struct NDCViewport { + float2 offset; + float2 scale; +}; + +vertex BasicVertexOut vertexBlit(uint vid [[vertex_id]], constant NDCViewport& viewport [[buffer(0)]]) { + BasicVertexOut out; + out.uv = float2((vid << 1) & 2, vid & 2); + out.position = float4(out.uv * 2.0 - 1.0, 0.0, 1.0); + out.position.y = -out.position.y; + out.uv = out.uv * viewport.scale + viewport.offset; + + return out; +} + +fragment float4 fragmentBlit(BasicVertexOut in [[stage_in]], texture2d tex [[texture(GET_HELPER_TEXTURE_BINDING(0))]], sampler samplr [[sampler(GET_HELPER_SAMPLER_STATE_BINDING(0))]]) { + return tex.sample(samplr, in.uv); +} diff --git a/src/host_shaders/metal_copy_to_lut_texture.metal b/src/host_shaders/metal_copy_to_lut_texture.metal new file mode 100644 index 00000000..c21246f1 --- /dev/null +++ b/src/host_shaders/metal_copy_to_lut_texture.metal @@ -0,0 +1,9 @@ +#include +using namespace metal; + +constant ushort lutTextureWidth [[function_constant(0)]]; + +// The copy is done in a vertex shader instead of a compute kernel, since dispatching compute would require ending the render pass +vertex void vertexCopyToLutTexture(uint vid [[vertex_id]], texture2d out [[texture(0)]], device float2* data [[buffer(0)]], constant uint& arrayOffset [[buffer(1)]]) { + out.write(float4(data[vid], 0.0, 0.0), uint2(vid % lutTextureWidth, arrayOffset + vid / lutTextureWidth)); +} diff --git a/src/host_shaders/metal_shaders.metal b/src/host_shaders/metal_shaders.metal new file mode 100644 index 00000000..18c310f7 --- /dev/null +++ b/src/host_shaders/metal_shaders.metal @@ -0,0 +1,759 @@ +#include +using namespace metal; + +struct BasicVertexOut { + float4 position [[position]]; + float2 uv; +}; + +constant float4 displayPositions[4] = { + float4(-1.0, -1.0, 0.0, 1.0), + float4( 1.0, -1.0, 0.0, 1.0), + float4(-1.0, 1.0, 0.0, 1.0), + float4( 1.0, 1.0, 0.0, 1.0) +}; + +constant float2 displayTexCoord[4] = { + float2(0.0, 1.0), + float2(0.0, 0.0), + float2(1.0, 1.0), + float2(1.0, 0.0) +}; + +vertex BasicVertexOut vertexDisplay(uint vid [[vertex_id]]) { + BasicVertexOut out; + out.position = displayPositions[vid]; + out.uv = displayTexCoord[vid]; + + return out; +} + +fragment float4 fragmentDisplay(BasicVertexOut in [[stage_in]], texture2d tex [[texture(0)]], sampler samplr [[sampler(0)]]) { + return tex.sample(samplr, in.uv); +} + +struct PicaRegs { + uint regs[0x200 - 0x48]; + + uint read(uint reg) constant { + return regs[reg - 0x48]; + } +}; + +struct VertTEV { + uint textureEnvColor[6]; +}; + +float4 abgr8888ToFloat4(uint abgr) { + const float scale = 1.0 / 255.0; + + return scale * float4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); +} + +struct DrawVertexIn { + float4 position [[attribute(0)]]; + float4 quaternion [[attribute(1)]]; + float4 color [[attribute(2)]]; + float2 texCoord0 [[attribute(3)]]; + float2 texCoord1 [[attribute(4)]]; + float texCoord0W [[attribute(5)]]; + float3 view [[attribute(6)]]; + float2 texCoord2 [[attribute(7)]]; +}; + +// Metal cannot return arrays from vertex functions, this is an ugly workaround +struct EnvColor { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; + + thread float4& operator[](int i) { + switch (i) { + case 0: return c0; + case 1: return c1; + case 2: return c2; + case 3: return c3; + case 4: return c4; + case 5: return c5; + default: return c0; + } + } +}; + +float3 rotateFloat3ByQuaternion(float3 v, float4 q) { + float3 u = q.xyz; + float s = q.w; + + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0u) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return as_type(hex); +} + +struct DepthUniforms { + float depthScale; + float depthOffset; + bool depthMapEnable; +}; + +struct DrawVertexOut { + float4 position [[position]]; + float4 quaternion; + float4 color; + float3 texCoord0; + float2 texCoord1; + float2 texCoord2; + float3 view; + float3 normal; + float3 tangent; + float3 bitangent; + EnvColor textureEnvColor [[flat]]; + float4 textureEnvBufferColor [[flat]]; +}; + +struct DrawVertexOutWithClip { + DrawVertexOut out; + float clipDistance [[clip_distance]] [2]; +}; + +// TODO: check this +float transformZ(float z, float w, constant DepthUniforms& depthUniforms) { + z = z / w * depthUniforms.depthScale + depthUniforms.depthOffset; + if (!depthUniforms.depthMapEnable) { + z *= w; + } + + return z * w; +} + +vertex DrawVertexOutWithClip vertexDraw(DrawVertexIn in [[stage_in]], constant PicaRegs& picaRegs [[buffer(0)]], constant VertTEV& tev [[buffer(1)]], constant DepthUniforms& depthUniforms [[buffer(2)]]) { + DrawVertexOut out; + + // Position + out.position = in.position; + // Flip the y position + out.position.y = -out.position.y; + + // Apply depth uniforms + out.position.z = transformZ(out.position.z, out.position.w, depthUniforms); + + // Color + out.color = min(abs(in.color), 1.0); + + // Texture coordinates + out.texCoord0 = float3(in.texCoord0, in.texCoord0W); + out.texCoord0.y = 1.0 - out.texCoord0.y; + out.texCoord1 = in.texCoord1; + out.texCoord1.y = 1.0 - out.texCoord1.y; + out.texCoord2 = in.texCoord2; + out.texCoord2.y = 1.0 - out.texCoord2.y; + + // View + out.view = in.view; + + // TBN + out.normal = normalize(rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion)); + out.tangent = normalize(rotateFloat3ByQuaternion(float3(1.0, 0.0, 0.0), in.quaternion)); + out.bitangent = normalize(rotateFloat3ByQuaternion(float3(0.0, 1.0, 0.0), in.quaternion)); + out.quaternion = in.quaternion; + + // Environment + for (int i = 0; i < 6; i++) { + out.textureEnvColor[i] = abgr8888ToFloat4(tev.textureEnvColor[i]); + } + + out.textureEnvBufferColor = abgr8888ToFloat4(picaRegs.read(0xFDu)); + + DrawVertexOutWithClip outWithClip; + outWithClip.out = out; + + // Parse clipping plane registers + float4 clipData = float4( + decodeFP(picaRegs.read(0x48u) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x49u) & 0xffffffu, 7u, 16u), + decodeFP(picaRegs.read(0x4Au) & 0xffffffu, 7u, 16u), decodeFP(picaRegs.read(0x4Bu) & 0xffffffu, 7u, 16u) + ); + + // There's also another, always-on clipping plane based on vertex z + // TODO: transform + outWithClip.clipDistance[0] = -in.position.z; + outWithClip.clipDistance[1] = dot(clipData, in.position); + + return outWithClip; +} + +constant bool lightingEnabled [[function_constant(0)]]; +constant uint8_t lightingNumLights [[function_constant(1)]]; +constant uint32_t lightingConfig1 [[function_constant(2)]]; +constant uint16_t alphaControl [[function_constant(3)]]; + +struct Globals { + bool error_unimpl; + + float4 tevSources[16]; + float4 tevNextPreviousBuffer; + bool tevUnimplementedSourceFlag = false; + + uint GPUREG_LIGHTING_LUTINPUT_SCALE; + uint GPUREG_LIGHTING_LUTINPUT_ABS; + uint GPUREG_LIGHTING_LUTINPUT_SELECT; + uint GPUREG_LIGHTi_CONFIG; + + // HACK + //bool lightingEnabled; + //uint8_t lightingNumLights; + //uint32_t lightingConfig1; + //uint16_t alphaControl; + + float3 normal; +}; + +// See docs/lighting.md +constant uint samplerEnabledBitfields[2] = {0x7170e645u, 0x7f013fefu}; + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} + +struct FragTEV { + uint textureEnvSource[6]; + uint textureEnvOperand[6]; + uint textureEnvCombiner[6]; + uint textureEnvScale[6]; + + float4 fetchSource(thread Globals& globals, uint src_id) constant { + if (src_id >= 6u && src_id < 13u) { + globals.tevUnimplementedSourceFlag = true; + } + + return globals.tevSources[src_id]; + } + + float4 getColorAndAlphaSource(thread Globals& globals, int tev_id, int src_id) constant { + float4 result; + + float4 colorSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4)) & 15u); + float4 alphaSource = fetchSource(globals, (textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); + + uint colorOperand = (textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; + uint alphaOperand = (textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; + + // TODO: figure out what the undocumented values do + switch (colorOperand) { + case 0u: result.rgb = colorSource.rgb; break; // Source color + case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color + case 2u: result.rgb = float3(colorSource.a); break; // Source alpha + case 3u: result.rgb = float3(1.0 - colorSource.a); break; // One minus source alpha + case 4u: result.rgb = float3(colorSource.r); break; // Source red + case 5u: result.rgb = float3(1.0 - colorSource.r); break; // One minus source red + case 8u: result.rgb = float3(colorSource.g); break; // Source green + case 9u: result.rgb = float3(1.0 - colorSource.g); break; // One minus source green + case 12u: result.rgb = float3(colorSource.b); break; // Source blue + case 13u: result.rgb = float3(1.0 - colorSource.b); break; // One minus source blue + default: break; + } + + // TODO: figure out what the undocumented values do + switch (alphaOperand) { + case 0u: result.a = alphaSource.a; break; // Source alpha + case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha + case 2u: result.a = alphaSource.r; break; // Source red + case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red + case 4u: result.a = alphaSource.g; break; // Source green + case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green + case 6u: result.a = alphaSource.b; break; // Source blue + case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue + default: break; + } + + return result; + } + + float4 calculateCombiner(thread Globals& globals, int tev_id) constant { + float4 source0 = getColorAndAlphaSource(globals, tev_id, 0); + float4 source1 = getColorAndAlphaSource(globals, tev_id, 1); + float4 source2 = getColorAndAlphaSource(globals, tev_id, 2); + + uint colorCombine = textureEnvCombiner[tev_id] & 15u; + uint alphaCombine = (textureEnvCombiner[tev_id] >> 16) & 15u; + + float4 result = float4(1.0); + + // TODO: figure out what the undocumented values do + switch (colorCombine) { + case 0u: result.rgb = source0.rgb; break; // Replace + case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate + case 2u: result.rgb = min(float3(1.0), source0.rgb + source1.rgb); break; // Add + case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate + case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract + case 6u: result.rgb = float3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB + case 7u: result = float4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA + case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add + case 9u: result.rgb = min((source0.rgb + source1.rgb), 1.0) * source2.rgb; break; // Add then multiply + default: break; + } + + if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. + // TODO: figure out what the undocumented values do + // TODO: test if the alpha combiner supports all the same modes as the color combiner. + switch (alphaCombine) { + case 0u: result.a = source0.a; break; // Replace + case 1u: result.a = source0.a * source1.a; break; // Modulate + case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add + case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate + case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract + case 8u: result.a = min(source0.a * source1.a + source2.a, 1.0); break; // Multiply then add + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply + default: break; + } + } + + result.rgb *= float(1 << (textureEnvScale[tev_id] & 3u)); + result.a *= float(1 << ((textureEnvScale[tev_id] >> 16) & 3u)); + + return result; + } +}; + +enum class LogicOp : uint8_t { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15 +}; + +uint4 performLogicOpU(LogicOp logicOp, uint4 s, uint4 d) { + switch (logicOp) { + case LogicOp::Clear: return as_type(float4(0.0)); + case LogicOp::And: return s & d; + case LogicOp::AndReverse: return s & ~d; + case LogicOp::Copy: return s; + case LogicOp::Set: return as_type(float4(1.0)); + case LogicOp::CopyInverted: return ~s; + case LogicOp::NoOp: return d; + case LogicOp::Invert: return ~d; + case LogicOp::Nand: return ~(s & d); + case LogicOp::Or: return s | d; + case LogicOp::Nor: return ~(s | d); + case LogicOp::Xor: return s ^ d; + case LogicOp::Equiv: return ~(s ^ d); + case LogicOp::AndInverted: return ~s & d; + case LogicOp::OrReverse: return s | ~d; + case LogicOp::OrInverted: return ~s | d; + } +} + +#define D0_LUT 0u +#define D1_LUT 1u +#define SP_LUT 2u +#define FR_LUT 3u +#define RB_LUT 4u +#define RG_LUT 5u +#define RR_LUT 6u + +float lutLookup(texture2d_array texLut, uint slice, uint lut, uint index) { + return texLut.read(uint2(index, lut), slice).r; +} + +float lightLutLookup(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, uint environment_id, uint lut_id, uint light_id, float3 light_vector, float3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + globals.error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (extract_bits(lightingConfig1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(globals.normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(in.view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(globals.normal, normalize(in.view)); + break; + } + case 3u: { + delta = dot(light_vector, globals.normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(picaRegs.read(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(picaRegs.read(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = extract_bits(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = extract_bits(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + float3 spotlight_vector = float3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + globals.error_unimpl = true; + break; + } + default: { + delta = 1.0; + globals.error_unimpl = true; + break; + } + } + + // 0 = enabled + if (extract_bits(globals.GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(texLut, slice, lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(texLut, slice, lut_index, index) * scale; + } +} + +float3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * float3(float(extract_bits(reg, 20, 8)), float(extract_bits(reg, 10, 8)), float(extract_bits(reg, 00, 8))); +} + +// Implements the following algorthm: https://mathb.in/26766 +void calcLighting(thread Globals& globals, thread DrawVertexOut& in, constant PicaRegs& picaRegs, texture2d_array texLut, uint slice, sampler linearSampler, thread float4& primaryColor, thread float4& secondaryColor) { + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + //float3 normal = normalize(in.normal); + //float3 tangent = normalize(in.tangent); + //float3 bitangent = normalize(in.bitangent); + //float3 view = normalize(in.view); + + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = picaRegs.read(0x01D9u); + + primaryColor = float4(0.0, 0.0, 0.0, 1.0); + secondaryColor = float4(0.0, 0.0, 0.0, 1.0); + + uint GPUREG_LIGHTING_CONFIG0 = picaRegs.read(0x01C3u); + globals.GPUREG_LIGHTING_LUTINPUT_SCALE = picaRegs.read(0x01D2u); + globals.GPUREG_LIGHTING_LUTINPUT_ABS = picaRegs.read(0x01D0u); + globals.GPUREG_LIGHTING_LUTINPUT_SELECT = picaRegs.read(0x01D1u); + + uint bumpMode = extract_bits(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bumpMode) { + default: { + globals.normal = rotateFloat3ByQuaternion(float3(0.0, 0.0, 1.0), in.quaternion); + break; + } + } + + float4 diffuseSum = float4(0.0, 0.0, 0.0, 1.0); + float4 specularSum = float4(0.0, 0.0, 0.0, 1.0); + + uint environmentId = extract_bits(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clampHighlights = extract_bits(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint lightId; + float3 lightVector = float3(0.0); + float3 halfVector = float3(0.0); + + for (uint i = 0u; i < lightingNumLights + 1; i++) { + lightId = extract_bits(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); + + uint GPUREG_LIGHTi_SPECULAR0 = picaRegs.read(0x0140u + (lightId << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = picaRegs.read(0x0141u + (lightId << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = picaRegs.read(0x0142u + (lightId << 4u)); + uint GPUREG_LIGHTi_AMBIENT = picaRegs.read(0x0143u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = picaRegs.read(0x0144u + (lightId << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = picaRegs.read(0x0145u + (lightId << 4u)); + globals.GPUREG_LIGHTi_CONFIG = picaRegs.read(0x0149u + (lightId << 4u)); + + float lightDistance; + float3 lightPosition = float3( + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), + decodeFP(extract_bits(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) + ); + + // Positional Light + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { + // error_unimpl = true; + lightVector = lightPosition + in.view; + } + + // Directional light + else { + lightVector = lightPosition; + } + + lightDistance = length(lightVector); + lightVector = normalize(lightVector); + halfVector = lightVector + normalize(in.view); + + float NdotL = dot(globals.normal, lightVector); // N dot Li + + // Two sided diffuse + if (extract_bits(globals.GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) + NdotL = max(0.0, NdotL); + else + NdotL = abs(NdotL); + + float geometricFactor; + bool useGeo0 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool useGeo1 = extract_bits(globals.GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (useGeo0 || useGeo1) { + geometricFactor = dot(halfVector, halfVector); + geometricFactor = geometricFactor == 0.0 ? 0.0 : min(NdotL / geometricFactor, 1.0); + } + + float distanceAttenuation = 1.0; + if (extract_bits(lightingConfig1, 24 + int(lightId), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = extract_bits(picaRegs.read(0x014Au + (lightId << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = extract_bits(picaRegs.read(0x014Bu + (lightId << 4u)), 0, 20); + + float distanceAttenuationBias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distanceAttenuationScale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = lightDistance * distanceAttenuationScale + distanceAttenuationBias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distanceAttenuation = lutLookup(texLut, slice, 16u + lightId, index); + } + + float spotlightAttenuation = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, SP_LUT, lightId, lightVector, halfVector); + float specular0Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D0_LUT, lightId, lightVector, halfVector); + float specular1Distribution = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, D1_LUT, lightId, lightVector, halfVector); + float3 reflectedColor; + reflectedColor.r = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RR_LUT, lightId, lightVector, halfVector); + + if (isSamplerEnabled(environmentId, RG_LUT)) { + reflectedColor.g = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RG_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.g = reflectedColor.r; + } + + if (isSamplerEnabled(environmentId, RB_LUT)) { + reflectedColor.b = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, RB_LUT, lightId, lightVector, halfVector); + } else { + reflectedColor.b = reflectedColor.r; + } + + float3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0Distribution; + float3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1Distribution * reflectedColor; + + specular0 *= useGeo0 ? geometricFactor : 1.0; + specular1 *= useGeo1 ? geometricFactor : 1.0; + + float clampFactor = 1.0; + if (clampHighlights && NdotL == 0.0) { + clampFactor = 0.0; + } + + float lightFactor = distanceAttenuation * spotlightAttenuation; + diffuseSum.rgb += lightFactor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specularSum.rgb += lightFactor * clampFactor * (specular0 + specular1); + } + uint fresnelOutput1 = extract_bits(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnelOutput2 = extract_bits(GPUREG_LIGHTING_CONFIG0, 3, 1); + + float fresnelFactor; + + if (fresnelOutput1 == 1u || fresnelOutput2 == 1u) { + fresnelFactor = lightLutLookup(globals, in, picaRegs, texLut, slice, environmentId, FR_LUT, lightId, lightVector, halfVector); + } + + if (fresnelOutput1 == 1u) { + diffuseSum.a = fresnelFactor; + } + + if (fresnelOutput2 == 1u) { + specularSum.a = fresnelFactor; + } + + uint GPUREG_LIGHTING_AMBIENT = picaRegs.read(0x01C0u); + float4 globalAmbient = float4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primaryColor = clamp(globalAmbient + diffuseSum, 0.0, 1.0); + secondaryColor = clamp(specularSum, 0.0, 1.0); +} + +float4 performLogicOp(LogicOp logicOp, float4 s, float4 d) { + return as_type(performLogicOpU(logicOp, as_type(s), as_type(d))); +} + +fragment float4 fragmentDraw(DrawVertexOut in [[stage_in]], float4 prevColor [[color(0)]], constant PicaRegs& picaRegs [[buffer(0)]], constant FragTEV& tev [[buffer(1)]], constant LogicOp& logicOp [[buffer(2)]], constant uint2& lutSlices [[buffer(3)]], texture2d tex0 [[texture(0)]], texture2d tex1 [[texture(1)]], texture2d tex2 [[texture(2)]], texture2d_array texLightingLut [[texture(3)]], texture1d_array texFogLut [[texture(4)]], sampler samplr0 [[sampler(0)]], sampler samplr1 [[sampler(1)]], sampler samplr2 [[sampler(2)]], sampler linearSampler [[sampler(3)]]) { + Globals globals; + + // HACK + //globals.lightingEnabled = picaRegs.read(0x008Fu) != 0u; + //globals.lightingNumLights = picaRegs.read(0x01C2u); + //globals.lightingConfig1 = picaRegs.read(0x01C4u); + //globals.alphaControl = picaRegs.read(0x104); + + globals.tevSources[0] = in.color; + if (lightingEnabled) { + calcLighting(globals, in, picaRegs, texLightingLut, lutSlices.x, linearSampler, globals.tevSources[1], globals.tevSources[2]); + } else { + globals.tevSources[1] = float4(0.0); + globals.tevSources[2] = float4(0.0); + } + + uint textureConfig = picaRegs.read(0x80u); + float2 texCoord2 = (textureConfig & (1u << 13)) != 0u ? in.texCoord1 : in.texCoord2; + + if ((textureConfig & 1u) != 0u) globals.tevSources[3] = tex0.sample(samplr0, in.texCoord0.xy); + if ((textureConfig & 2u) != 0u) globals.tevSources[4] = tex1.sample(samplr1, in.texCoord1); + if ((textureConfig & 4u) != 0u) globals.tevSources[5] = tex2.sample(samplr2, texCoord2); + globals.tevSources[13] = float4(0.0); // Previous buffer + globals.tevSources[15] = in.color; // Previous combiner + + globals.tevNextPreviousBuffer = in.textureEnvBufferColor; + uint textureEnvUpdateBuffer = picaRegs.read(0xE0u); + + for (int i = 0; i < 6; i++) { + globals.tevSources[14] = in.textureEnvColor[i]; // Constant color + globals.tevSources[15] = tev.calculateCombiner(globals, i); + globals.tevSources[13] = globals.tevNextPreviousBuffer; + + if (i < 4) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + globals.tevNextPreviousBuffer.rgb = globals.tevSources[15].rgb; + } + + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + globals.tevNextPreviousBuffer.a = globals.tevSources[15].a; + } + } + } + + float4 color = globals.tevSources[15]; + + // Fog + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flipDepth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fogIndex = flipDepth ? 1.0 - in.position.z : in.position.z; + fogIndex *= 128.0; + float clampedIndex = clamp(floor(fogIndex), 0.0, 127.0); + float delta = fogIndex - clampedIndex; + float2 value = texFogLut.read(clampedIndex, lutSlices.y).rg; + float fogFactor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = picaRegs.read(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = (GPUREG_FOG_COLOR & 0xFFu) / 255.0; + float g = ((GPUREG_FOG_COLOR >> 8) & 0xFFu) / 255.0; + float b = ((GPUREG_FOG_COLOR >> 16) & 0xFFu) / 255.0; + float3 fogColor = float3(r, g, b); + + color.rgb = mix(fogColor, color.rgb, fogFactor); + } + + // Perform alpha test + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; + float alpha = color.a; + + switch (func) { + case 0u: discard_fragment(); // Never pass alpha test + case 1u: break; // Always pass alpha test + case 2u: // Pass if equal + if (alpha != reference) discard_fragment(); + break; + case 3u: // Pass if not equal + if (alpha == reference) discard_fragment(); + break; + case 4u: // Pass if less than + if (alpha >= reference) discard_fragment(); + break; + case 5u: // Pass if less than or equal + if (alpha > reference) discard_fragment(); + break; + case 6u: // Pass if greater than + if (alpha <= reference) discard_fragment(); + break; + case 7u: // Pass if greater than or equal + if (alpha < reference) discard_fragment(); + break; + } + } + + return performLogicOp(logicOp, color, prevColor); +} diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag index f6fa6c55..9f07df0b 100644 --- a/src/host_shaders/opengl_fragment_shader.frag +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -1,8 +1,6 @@ #version 410 core -in vec3 v_tangent; -in vec3 v_normal; -in vec3 v_bitangent; +in vec4 v_quaternion; in vec4 v_colour; in vec3 v_texcoord0; in vec2 v_texcoord1; @@ -27,7 +25,7 @@ uniform bool u_depthmapEnable; uniform sampler2D u_tex0; uniform sampler2D u_tex1; uniform sampler2D u_tex2; -uniform sampler1DArray u_tex_lighting_lut; +uniform sampler2D u_tex_luts; uniform uint u_picaRegs[0x200 - 0x48]; @@ -37,6 +35,16 @@ uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48u]; } vec4 tevSources[16]; vec4 tevNextPreviousBuffer; bool tevUnimplementedSourceFlag = false; +vec3 normal; + +// See docs/lighting.md +const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu); + +bool isSamplerEnabled(uint environment_id, uint lut_id) { + uint index = 7 * environment_id + lut_id; + uint arrayIndex = (index >> 5); + return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u; +} // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml @@ -110,7 +118,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add - case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply + case 9u: result.rgb = min(source0.rgb + source1.rgb, 1.0) * source2.rgb; break; // Add then multiply default: break; } @@ -125,7 +133,7 @@ vec4 tevCalculateCombiner(int tev_id) { case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add - case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply + case 9u: result.a = min(source0.a + source1.a, 1.0) * source2.a; break; // Add then multiply default: break; } } @@ -144,10 +152,18 @@ vec4 tevCalculateCombiner(int tev_id) { #define RG_LUT 5u #define RR_LUT 6u -float lutLookup(uint lut, uint light, float value) { - if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; - if (lut == SP_LUT) lut = light + 8; - return texture(u_tex_lighting_lut, vec2(value, lut)).r; +#define FOG_INDEX 24 + +uint GPUREG_LIGHTi_CONFIG; +uint GPUREG_LIGHTING_CONFIG1; +uint GPUREG_LIGHTING_LUTINPUT_SELECT; +uint GPUREG_LIGHTING_LUTINPUT_SCALE; +uint GPUREG_LIGHTING_LUTINPUT_ABS; +bool error_unimpl = false; +vec4 unimpl_color = vec4(1.0, 0.0, 1.0, 1.0); + +float lutLookup(uint lut, int index) { + return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r; } vec3 regToColor(uint reg) { @@ -178,136 +194,179 @@ float decodeFP(uint hex, uint E, uint M) { return uintBitsToFloat(hex); } +float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light_vector, vec3 half_vector) { + uint lut_index; + int bit_in_config1; + if (lut_id == SP_LUT) { + // These are the spotlight attenuation LUTs + bit_in_config1 = 8 + int(light_id & 7u); + lut_index = 8u + light_id; + } else if (lut_id <= 6) { + bit_in_config1 = 16 + int(lut_id); + lut_index = lut_id; + } else { + error_unimpl = true; + } + + bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment + + if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) { + return 1.0; + } + + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + float delta = 1.0; + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3); + switch (input_id) { + case 0u: { + delta = dot(normal, normalize(half_vector)); + break; + } + case 1u: { + delta = dot(normalize(v_view), normalize(half_vector)); + break; + } + case 2u: { + delta = dot(normal, normalize(v_view)); + break; + } + case 3u: { + delta = dot(light_vector, normal); + break; + } + case 4u: { + int GPUREG_LIGHTi_SPOTDIR_LOW = int(readPicaReg(0x0146u + (light_id << 4u))); + int GPUREG_LIGHTi_SPOTDIR_HIGH = int(readPicaReg(0x0147u + (light_id << 4u))); + + // Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions + // of GLSL so we do it manually + int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13); + int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13); + int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13); + + if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000; + if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000; + if ((se_z & 0x1000) == 0x1000) se_z |= 0xffffe000; + + // These are fixed point 1.1.11 values, so we need to convert them to float + float x = float(se_x) / 2047.0; + float y = float(se_y) / 2047.0; + float z = float(se_z) / 2047.0; + vec3 spotlight_vector = vec3(x, y, z); + delta = dot(light_vector, spotlight_vector); // spotlight direction is negated so we don't negate light_vector + break; + } + case 5u: { + delta = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + break; + } + default: { + delta = 1.0; + error_unimpl = true; + break; + } + } + + // 0 = enabled + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) { + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) { + delta = max(delta, 0.0); + } else { + delta = abs(delta); + } + int index = int(clamp(floor(delta * 255.0), 0.f, 255.f)); + return lutLookup(lut_index, index) * scale; + } else { + // Range is [-1, 1] so we need to map it to [0, 1] + int index = int(clamp(floor(delta * 128.0), -128.f, 127.f)); + if (index < 0) index += 256; + return lutLookup(lut_index, index) * scale; + } +} + +vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + // Implements the following algorthm: https://mathb.in/26766 void calcLighting(out vec4 primary_color, out vec4 secondary_color) { - // Quaternions describe a transformation from surface-local space to eye space. - // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), - // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec3 normal = normalize(v_normal); - vec3 tangent = normalize(v_tangent); - vec3 bitangent = normalize(v_bitangent); - vec3 view = normalize(v_view); - uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu); if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) { - primary_color = secondary_color = vec4(1.0); + primary_color = secondary_color = vec4(0.0); return; } - uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2u) & 0x7u) + 1u; uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9u); primary_color = vec4(vec3(0.0), 1.0); secondary_color = vec4(vec3(0.0), 1.0); - primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); - - uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); - uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3u); - uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); - uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); - float d[7]; + GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2u); + GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4u); + GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u); + GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u); - bool error_unimpl = false; + uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2); + + // Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker + switch (bump_mode) { + default: { + normal = rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), v_quaternion); + break; + } + } + + vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0); + vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0); + + uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4); + bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u; + + uint light_id; + vec3 light_vector; + vec3 half_vector; for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3); + light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3); - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + 0x10u * light_id); - uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + 0x10u * light_id); - uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + 0x10u * light_id); - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id); + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u)); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u)); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142u + (light_id << 4u)); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144u + (light_id << 4u)); + uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145u + (light_id << 4u)); + GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + (light_id << 4u)); - vec3 light_vector = normalize(vec3( + float light_distance; + vec3 light_position = vec3( decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u) - )); - - vec3 half_vector; + ); // Positional Light if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) { - // error_unimpl = true; - half_vector = normalize(normalize(light_vector + v_view) + view); + light_vector = light_position + v_view; } // Directional light else { - half_vector = normalize(normalize(light_vector) + view); + light_vector = light_position; } - for (int c = 0; c < 7; c++) { - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) { - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) scale /= 256.0; + light_distance = length(light_vector); + light_vector = normalize(light_vector); + half_vector = light_vector + normalize(v_view); - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) - d[c] = dot(normal, half_vector); - else if (input_id == 1u) - d[c] = dot(view, half_vector); - else if (input_id == 2u) - d[c] = dot(normal, view); - else if (input_id == 3u) - d[c] = dot(light_vector, normal); - else if (input_id == 4u) { - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id); - vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { - d[c] = 1.0; // TODO: cos (aka CP); - error_unimpl = true; - } else { - d[c] = 1.0; - } - - d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale; - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); - if (lookup_config == 0u) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 1u) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 2u) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 3u) { - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; - } else if (lookup_config == 4u) { - d[FR_LUT] = 0.0; - } else if (lookup_config == 5u) { - d[D1_LUT] = 0.0; - } else if (lookup_config == 6u) { - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } - - float distance_factor = 1.0; // a - float indirect_factor = 1.0; // fi - float shadow_factor = 1.0; // o - - float NdotL = dot(normal, light_vector); // Li dot N + float NdotL = dot(normal, light_vector); // N dot Li // Two sided diffuse if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) @@ -315,20 +374,86 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) { else NdotL = abs(NdotL); - float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; + float geometric_factor; + bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u; + bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u; + if (use_geo_0 || use_geo_1) { + geometric_factor = dot(half_vector, half_vector); + geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0); + } - primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); - secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + float distance_attenuation = 1.0; + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) { + uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20); + uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20); + + float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u); + float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u); + + float delta = light_distance * distance_attenuation_scale + distance_attenuation_bias; + delta = clamp(delta, 0.0, 1.0); + int index = int(clamp(floor(delta * 255.0), 0.0, 255.0)); + distance_attenuation = lutLookup(16u + light_id, index); + } + + float spotlight_attenuation = lightLutLookup(environment_id, SP_LUT, light_id, light_vector, half_vector); + float specular0_distribution = lightLutLookup(environment_id, D0_LUT, light_id, light_vector, half_vector); + float specular1_distribution = lightLutLookup(environment_id, D1_LUT, light_id, light_vector, half_vector); + vec3 reflected_color; + reflected_color.r = lightLutLookup(environment_id, RR_LUT, light_id, light_vector, half_vector); + + if (isSamplerEnabled(environment_id, RG_LUT)) { + reflected_color.g = lightLutLookup(environment_id, RG_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.g = reflected_color.r; + } + + if (isSamplerEnabled(environment_id, RB_LUT)) { + reflected_color.b = lightLutLookup(environment_id, RB_LUT, light_id, light_vector, half_vector); + } else { + reflected_color.b = reflected_color.r; + } + + vec3 specular0 = regToColor(GPUREG_LIGHTi_SPECULAR0) * specular0_distribution; + vec3 specular1 = regToColor(GPUREG_LIGHTi_SPECULAR1) * specular1_distribution * reflected_color; + + specular0 *= use_geo_0 ? geometric_factor : 1.0; + specular1 *= use_geo_1 ? geometric_factor : 1.0; + + float clamp_factor = 1.0; + if (clamp_highlights && NdotL == 0.0) { + clamp_factor = 0.0; + } + + float light_factor = distance_attenuation * spotlight_attenuation; + diffuse_sum.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1); } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + // Uses parameters from the last light as Fresnel is only applied to the last light + float fresnel_factor; + + if (fresnel_output1 == 1u || fresnel_output2 == 1u) { + fresnel_factor = lightLutLookup(environment_id, FR_LUT, light_id, light_vector, half_vector); + } + + if (fresnel_output1 == 1u) { + diffuse_sum.a = fresnel_factor; + } - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) { + specular_sum.a = fresnel_factor; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0u); + vec4 global_ambient = vec4(regToColor(GPUREG_LIGHTING_AMBIENT), 1.0); + primary_color = clamp(global_ambient + diffuse_sum, vec4(0.0), vec4(1.0)); + secondary_color = clamp(specular_sum, vec4(0.0), vec4(1.0)); if (error_unimpl) { - // secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); + // secondary_color = primary_color = unimpl_color; } } @@ -371,7 +496,7 @@ void main() { if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } - // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // fragColour.rg = texture(u_tex_luts,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -384,6 +509,28 @@ void main() { // Write final fragment depth gl_FragDepth = depth; + bool enable_fog = (textureEnvUpdateBuffer & 7u) == 5u; + + if (enable_fog) { + bool flip_depth = (textureEnvUpdateBuffer & (1u << 16)) != 0u; + float fog_index = flip_depth ? 1.0 - depth : depth; + fog_index *= 128.0; + float clamped_index = clamp(floor(fog_index), 0.0, 127.0); + float delta = fog_index - clamped_index; + vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), FOG_INDEX), 0).rg; + float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0); + + uint GPUREG_FOG_COLOR = readPicaReg(0x00E1u); + + // Annoyingly color is not encoded in the same way as light color + float r = float(GPUREG_FOG_COLOR & 0xFFu); + float g = float((GPUREG_FOG_COLOR >> 8u) & 0xFFu); + float b = float((GPUREG_FOG_COLOR >> 16u) & 0xFFu); + vec3 fog_color = (1.0 / 255.0) * vec3(r, g, b); + + fragColour.rgb = mix(fog_color, fragColour.rgb, fog_factor); + } + // Perform alpha test uint alphaControl = readPicaReg(0x104u); if ((alphaControl & 1u) != 0u) { // Check if alpha test is on diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert index a25d7a6d..057f9a88 100644 --- a/src/host_shaders/opengl_vertex_shader.vert +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -9,9 +9,7 @@ layout(location = 5) in float a_texcoord0_w; layout(location = 6) in vec3 a_view; layout(location = 7) in vec2 a_texcoord2; -out vec3 v_normal; -out vec3 v_tangent; -out vec3 v_bitangent; +out vec4 v_quaternion; out vec4 v_colour; out vec3 v_texcoord0; out vec2 v_texcoord1; @@ -35,12 +33,6 @@ vec4 abgr8888ToVec4(uint abgr) { return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); } -vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); -} - // Convert an arbitrary-width floating point literal to an f32 float decodeFP(uint hex, uint E, uint M) { uint width = M + E + 1u; @@ -73,10 +65,6 @@ void main() { v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); v_view = a_view; - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); - for (int i = 0; i < 6; i++) { v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); } @@ -95,4 +83,6 @@ void main() { // There's also another, always-on clipping plane based on vertex z gl_ClipDistance[0] = -a_coords.z; gl_ClipDistance[1] = dot(clipData, a_coords); + + v_quaternion = a_quaternion; } diff --git a/src/hydra_core.cpp b/src/hydra_core.cpp index acbf30a8..078b8a6c 100644 --- a/src/hydra_core.cpp +++ b/src/hydra_core.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -113,7 +114,7 @@ hydra::Size HydraCore::getNativeSize() { return {400, 480}; } void HydraCore::setOutputSize(hydra::Size size) {} void HydraCore::resetContext() { -#ifdef __ANDROID__ +#ifdef USING_GLES if (!gladLoadGLES2Loader(reinterpret_cast(getProcAddress))) { Helpers::panic("OpenGL ES init failed"); } @@ -150,7 +151,7 @@ HC_API const char* getInfo(hydra::InfoType type) { case hydra::InfoType::SystemName: return "Nintendo 3DS"; case hydra::InfoType::Description: return "HLE 3DS emulator. There's a little Alber in your computer and he runs Nintendo 3DS games."; case hydra::InfoType::Author: return "wheremyfoodat (Peach)"; - case hydra::InfoType::Version: return "0.7"; + case hydra::InfoType::Version: return PANDA3DS_VERSION; case hydra::InfoType::License: return "GPLv3"; case hydra::InfoType::Website: return "https://panda3ds.com/"; case hydra::InfoType::Extensions: return "3ds,cci,cxi,app,3dsx,elf,axf"; diff --git a/src/jni_driver.cpp b/src/jni_driver.cpp index e4ce2b39..fbfae8ff 100644 --- a/src/jni_driver.cpp +++ b/src/jni_driver.cpp @@ -8,6 +8,7 @@ #include "renderer_gl/renderer_gl.hpp" #include "services/hid.hpp" #include "android_utils.hpp" +#include "sdl_sensors.hpp" std::unique_ptr emulator = nullptr; HIDService* hidService = nullptr; @@ -43,6 +44,7 @@ extern "C" { AlberFunction(void, functionName) (JNIEnv* env, jobject obj, type value) { emulator->getConfig().settingName = value; } MAKE_SETTING(setShaderJitEnabled, jboolean, shaderJitEnabled) +MAKE_SETTING(setAccurateShaderMulEnable, jboolean, accurateShaderMul) #undef MAKE_SETTING @@ -87,6 +89,7 @@ AlberFunction(void, Finalize)(JNIEnv* env, jobject obj) { emulator = nullptr; hidService = nullptr; renderer = nullptr; + romLoaded = false; } AlberFunction(jboolean, HasRomLoaded)(JNIEnv* env, jobject obj) { return romLoaded; } @@ -110,6 +113,19 @@ AlberFunction(void, TouchScreenUp)(JNIEnv* env, jobject obj) { hidService->relea AlberFunction(void, KeyUp)(JNIEnv* env, jobject obj, jint keyCode) { hidService->releaseKey((u32)keyCode); } AlberFunction(void, KeyDown)(JNIEnv* env, jobject obj, jint keyCode) { hidService->pressKey((u32)keyCode); } +AlberFunction(void, SetGyro)(JNIEnv* env, jobject obj, jfloat roll, jfloat pitch, jfloat yaw) { + auto rotation = Sensors::SDL::convertRotation({ float(roll), float(pitch), float(yaw) }); + hidService->setPitch(s16(rotation.x)); + hidService->setRoll(s16(rotation.y)); + hidService->setYaw(s16(rotation.z)); +} + +AlberFunction(void, SetAccel)(JNIEnv* env, jobject obj, jfloat rawX, jfloat rawY, jfloat rawZ) { + float data[3] = { float(rawX), float(rawY), float(rawZ) }; + auto accel = Sensors::SDL::convertAcceleration(data); + hidService->setAccel(accel.x, accel.y, accel.z); +} + AlberFunction(void, SetCirclepadAxis)(JNIEnv* env, jobject obj, jint x, jint y) { hidService->setCirclepadX((s16)x); hidService->setCirclepadY((s16)y); @@ -139,4 +155,4 @@ int AndroidUtils::openDocument(const char* path, const char* perms) { env->DeleteLocalRef(jmode); return (int)result; -} \ No newline at end of file +} diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp new file mode 100644 index 00000000..ce5fddaf --- /dev/null +++ b/src/libretro_core.cpp @@ -0,0 +1,424 @@ +#include +#include +#include + +#include + +#include +#include +#include + +static retro_environment_t envCallback; +static retro_video_refresh_t videoCallback; +static retro_audio_sample_batch_t audioBatchCallback; +static retro_input_poll_t inputPollCallback; +static retro_input_state_t inputStateCallback; + +static retro_hw_render_callback hwRender; +static std::filesystem::path savePath; + +static bool screenTouched; + +std::unique_ptr emulator; +RendererGL* renderer; + +std::filesystem::path Emulator::getConfigPath() { + return std::filesystem::path(savePath / "config.toml"); +} + +std::filesystem::path Emulator::getAppDataRoot() { + return std::filesystem::path(savePath / "Emulator Files"); +} + +static void* getGLProcAddress(const char* name) { + return (void*)hwRender.get_proc_address(name); +} + +static void videoResetContext() { +#ifdef USING_GLES + if (!gladLoadGLES2Loader(reinterpret_cast(getGLProcAddress))) { + Helpers::panic("OpenGL ES init failed"); + } +#else + if (!gladLoadGLLoader(reinterpret_cast(getGLProcAddress))) { + Helpers::panic("OpenGL init failed"); + } +#endif + + emulator->initGraphicsContext(nullptr); +} + +static void videoDestroyContext() { + emulator->deinitGraphicsContext(); +} + +static bool setHWRender(retro_hw_context_type type) { + hwRender.context_type = type; + hwRender.context_reset = videoResetContext; + hwRender.context_destroy = videoDestroyContext; + hwRender.bottom_left_origin = true; + + switch (type) { + case RETRO_HW_CONTEXT_OPENGL_CORE: + hwRender.version_major = 4; + hwRender.version_minor = 1; + + if (envCallback(RETRO_ENVIRONMENT_SET_HW_RENDER, &hwRender)) { + return true; + } + break; + case RETRO_HW_CONTEXT_OPENGLES3: + case RETRO_HW_CONTEXT_OPENGL: + hwRender.version_major = 3; + hwRender.version_minor = 1; + + if (envCallback(RETRO_ENVIRONMENT_SET_HW_RENDER, &hwRender)) { + return true; + } + break; + default: break; + } + + return false; +} + +static void videoInit() { + retro_hw_context_type preferred = RETRO_HW_CONTEXT_NONE; + envCallback(RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER, &preferred); + + if (preferred && setHWRender(preferred)) return; + if (setHWRender(RETRO_HW_CONTEXT_OPENGL_CORE)) return; + if (setHWRender(RETRO_HW_CONTEXT_OPENGL)) return; + if (setHWRender(RETRO_HW_CONTEXT_OPENGLES3)) return; + + hwRender.context_type = RETRO_HW_CONTEXT_NONE; +} + +static bool getButtonState(uint id) { return inputStateCallback(0, RETRO_DEVICE_JOYPAD, 0, id); } +static float getAxisState(uint index, uint id) { return inputStateCallback(0, RETRO_DEVICE_ANALOG, index, id); } + +static void inputInit() { + static const retro_controller_description controllers[] = { + {"Nintendo 3DS", RETRO_DEVICE_JOYPAD}, + {NULL, 0}, + }; + + static const retro_controller_info ports[] = { + {controllers, 1}, + {NULL, 0}, + }; + + envCallback(RETRO_ENVIRONMENT_SET_CONTROLLER_INFO, (void*)ports); + + retro_input_descriptor desc[] = { + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT, "Left"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP, "Up"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN, "Down"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "Right"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A, "A"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B, "B"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT, "Select"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START, "Start"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R, "R"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L, "L"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X, "X"}, + {0, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y, "Y"}, + {0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X, "Circle Pad X"}, + {0, RETRO_DEVICE_ANALOG, RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y, "Circle Pad Y"}, + {0}, + }; + + envCallback(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, &desc); +} + +static std::string fetchVariable(std::string key, std::string def) { + retro_variable var = {nullptr}; + var.key = key.c_str(); + + if (!envCallback(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value == nullptr) { + Helpers::warn("Fetching variable %s failed.", key.c_str()); + return def; + } + + return std::string(var.value); +} + +static int fetchVariableInt(std::string key, int def) { + std::string value = fetchVariable(key, std::to_string(def)); + + if (!value.empty() && std::isdigit(value[0])) { + return std::stoi(value); + } + + return 0; +} + +static bool fetchVariableBool(std::string key, bool def) { + return fetchVariable(key, def ? "enabled" : "disabled") == "enabled"; +} + +static int fetchVariableRange(std::string key, int min, int max) { + return std::clamp(fetchVariableInt(key, min), min, max); +} + +static void configInit() { + static const retro_variable values[] = { + {"panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault ? "Enable shader JIT; enabled|disabled" : "Enable shader JIT; disabled|enabled"}, + {"panda3ds_accelerate_shaders", + EmulatorConfig::accelerateShadersDefault ? "Run 3DS shaders on the GPU; enabled|disabled" : "Run 3DS shaders on the GPU; disabled|enabled"}, + {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, + {"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled" + : "Use ubershaders (No stutter, maybe slower); disabled|enabled"}, + {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, + {"panda3ds_dsp_emulation", "DSP emulation; HLE|LLE|Null"}, + {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, + {"panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled"}, + {"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"}, + {"panda3ds_battery_level", "Battery percentage; 5|10|20|30|50|70|90|100"}, + {"panda3ds_use_charger", "Charger plugged; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override", "Force shadergen when rendering lights; enabled|disabled"}, + {"panda3ds_ubershader_lighting_override_threshold", "Light threshold for forcing shadergen; 1|2|3|4|5|6|7|8"}, + {nullptr, nullptr}, + }; + + envCallback(RETRO_ENVIRONMENT_SET_VARIABLES, (void*)values); +} + +static void configUpdate() { + EmulatorConfig& config = emulator->getConfig(); + + config.rendererType = RendererType::OpenGL; + config.vsyncEnabled = fetchVariableBool("panda3ds_use_vsync", true); + config.shaderJitEnabled = fetchVariableBool("panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault); + config.chargerPlugged = fetchVariableBool("panda3ds_use_charger", true); + config.batteryPercentage = fetchVariableRange("panda3ds_battery_level", 5, 100); + config.dspType = Audio::DSPCore::typeFromString(fetchVariable("panda3ds_dsp_emulation", "null")); + config.audioEnabled = fetchVariableBool("panda3ds_use_audio", false); + config.sdCardInserted = fetchVariableBool("panda3ds_use_virtual_sd", true); + config.sdWriteProtected = fetchVariableBool("panda3ds_write_protect_virtual_sd", false); + config.accurateShaderMul = fetchVariableBool("panda3ds_accurate_shader_mul", false); + config.useUbershaders = fetchVariableBool("panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault); + config.accelerateShaders = fetchVariableBool("panda3ds_accelerate_shaders", EmulatorConfig::accelerateShadersDefault); + + config.forceShadergenForLights = fetchVariableBool("panda3ds_ubershader_lighting_override", true); + config.lightShadergenThreshold = fetchVariableRange("panda3ds_ubershader_lighting_override_threshold", 1, 8); + config.discordRpcEnabled = false; + + config.save(); +} + +static void configCheckVariables() { + bool updated = false; + envCallback(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated); + + if (updated) { + configUpdate(); + } +} + +void retro_get_system_info(retro_system_info* info) { + info->need_fullpath = true; + info->valid_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"; + info->library_version = PANDA3DS_VERSION; + info->library_name = "Panda3DS"; + info->block_extract = false; +} + +void retro_get_system_av_info(retro_system_av_info* info) { + info->geometry.base_width = emulator->width; + info->geometry.base_height = emulator->height; + + info->geometry.max_width = info->geometry.base_width; + info->geometry.max_height = info->geometry.base_height; + + info->geometry.aspect_ratio = float(5.0 / 6.0); + info->timing.fps = 60.0; + info->timing.sample_rate = 32768; +} + +void retro_set_environment(retro_environment_t cb) { + envCallback = cb; +} + +void retro_set_video_refresh(retro_video_refresh_t cb) { + videoCallback = cb; +} + +void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { + audioBatchCallback = cb; +} + +void retro_set_audio_sample(retro_audio_sample_t cb) {} + +void retro_set_input_poll(retro_input_poll_t cb) { + inputPollCallback = cb; +} + +void retro_set_input_state(retro_input_state_t cb) { + inputStateCallback = cb; +} + +void retro_init() { + enum retro_pixel_format xrgb888 = RETRO_PIXEL_FORMAT_XRGB8888; + envCallback(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &xrgb888); + + char* saveDir = nullptr; + + if (!envCallback(RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY, &saveDir) || saveDir == nullptr) { + Helpers::warn("No save directory provided by LibRetro."); + savePath = std::filesystem::current_path(); + } else { + savePath = std::filesystem::path(saveDir); + } + + emulator = std::make_unique(); +} + +void retro_deinit() { + emulator = nullptr; +} + +bool retro_load_game(const retro_game_info* game) { + configInit(); + configUpdate(); + + if (emulator->getRendererType() != RendererType::OpenGL) { + Helpers::panic("Libretro: Renderer is not OpenGL"); + } + + renderer = static_cast(emulator->getRenderer()); + emulator->setOutputSize(emulator->width, emulator->height); + + inputInit(); + videoInit(); + + return emulator->loadROM(game->path); +} + +bool retro_load_game_special(uint type, const retro_game_info* info, usize num) { return false; } + +void retro_unload_game() { + renderer->setFBO(0); + renderer = nullptr; +} + +void retro_reset() { + emulator->reset(Emulator::ReloadOption::Reload); +} + +void retro_run() { + configCheckVariables(); + + renderer->setFBO(hwRender.get_current_framebuffer()); + renderer->resetStateManager(); + + inputPollCallback(); + + HIDService& hid = emulator->getServiceManager().getHID(); + + hid.setKey(HID::Keys::A, getButtonState(RETRO_DEVICE_ID_JOYPAD_A)); + hid.setKey(HID::Keys::B, getButtonState(RETRO_DEVICE_ID_JOYPAD_B)); + hid.setKey(HID::Keys::X, getButtonState(RETRO_DEVICE_ID_JOYPAD_X)); + hid.setKey(HID::Keys::Y, getButtonState(RETRO_DEVICE_ID_JOYPAD_Y)); + hid.setKey(HID::Keys::L, getButtonState(RETRO_DEVICE_ID_JOYPAD_L)); + hid.setKey(HID::Keys::R, getButtonState(RETRO_DEVICE_ID_JOYPAD_R)); + hid.setKey(HID::Keys::Start, getButtonState(RETRO_DEVICE_ID_JOYPAD_START)); + hid.setKey(HID::Keys::Select, getButtonState(RETRO_DEVICE_ID_JOYPAD_SELECT)); + hid.setKey(HID::Keys::Up, getButtonState(RETRO_DEVICE_ID_JOYPAD_UP)); + hid.setKey(HID::Keys::Down, getButtonState(RETRO_DEVICE_ID_JOYPAD_DOWN)); + hid.setKey(HID::Keys::Left, getButtonState(RETRO_DEVICE_ID_JOYPAD_LEFT)); + hid.setKey(HID::Keys::Right, getButtonState(RETRO_DEVICE_ID_JOYPAD_RIGHT)); + + // Get analog values for the left analog stick (Right analog stick is N3DS-only and unimplemented) + float xLeft = getAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_X); + float yLeft = getAxisState(RETRO_DEVICE_INDEX_ANALOG_LEFT, RETRO_DEVICE_ID_ANALOG_Y); + + hid.setCirclepadX((xLeft / +32767) * 0x9C); + hid.setCirclepadY((yLeft / -32767) * 0x9C); + + bool touchScreen = false; + + const int posX = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_X); + const int posY = inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_Y); + + const int newX = static_cast((posX + 0x7fff) / (float)(0x7fff * 2) * emulator->width); + const int newY = static_cast((posY + 0x7fff) / (float)(0x7fff * 2) * emulator->height); + + const int offsetX = 40; + const int offsetY = emulator->height / 2; + + const bool inScreenX = newX >= offsetX && newX <= emulator->width - offsetX; + const bool inScreenY = newY >= offsetY && newY <= emulator->height; + + if (inScreenX && inScreenY) { + touchScreen |= inputStateCallback(0, RETRO_DEVICE_MOUSE, 0, RETRO_DEVICE_ID_MOUSE_LEFT); + touchScreen |= inputStateCallback(0, RETRO_DEVICE_POINTER, 0, RETRO_DEVICE_ID_POINTER_PRESSED); + } + + if (touchScreen) { + u16 x = static_cast(newX - offsetX); + u16 y = static_cast(newY - offsetY); + + hid.setTouchScreenPress(x, y); + screenTouched = true; + } else if (screenTouched) { + hid.releaseTouchScreen(); + screenTouched = false; + } + + hid.updateInputs(emulator->getTicks()); + emulator->runFrame(); + + videoCallback(RETRO_HW_FRAME_BUFFER_VALID, emulator->width, emulator->height, 0); +} + +void retro_set_controller_port_device(uint port, uint device) {} + +usize retro_serialize_size() { + usize size = 0; + return size; +} + +bool retro_serialize(void* data, usize size) { return false; } +bool retro_unserialize(const void* data, usize size) { return false; } + +uint retro_get_region() { return RETRO_REGION_NTSC; } +uint retro_api_version() { return RETRO_API_VERSION; } + +usize retro_get_memory_size(uint id) { + if (id == RETRO_MEMORY_SYSTEM_RAM) { + return Memory::FCRAM_SIZE; + } + + return 0; +} + +void* retro_get_memory_data(uint id) { + if (id == RETRO_MEMORY_SYSTEM_RAM) { + return emulator->getMemory().getFCRAM(); + } + + return nullptr; +} + +void retro_cheat_set(uint index, bool enabled, const char* code) { + std::string cheatCode = std::regex_replace(code, std::regex("[^0-9a-fA-F]"), ""); + std::vector bytes; + + for (usize i = 0; i < cheatCode.size(); i += 2) { + std::string hex = cheatCode.substr(i, 2); + bytes.push_back((u8)std::stoul(hex, nullptr, 16)); + } + + u32 id = emulator->getCheats().addCheat(bytes.data(), bytes.size()); + + if (enabled) { + emulator->getCheats().enableCheat(id); + } else { + emulator->getCheats().disableCheat(id); + } +} + +void retro_cheat_reset() { + emulator->getCheats().reset(); +} diff --git a/src/lua.cpp b/src/lua.cpp index 6a16ab5b..5b78cec2 100644 --- a/src/lua.cpp +++ b/src/lua.cpp @@ -130,6 +130,32 @@ MAKE_MEMORY_FUNCTIONS(32) MAKE_MEMORY_FUNCTIONS(64) #undef MAKE_MEMORY_FUNCTIONS +static int readFloatThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + lua_pushnumber(L, (lua_Number)Helpers::bit_cast(LuaManager::g_emulator->getMemory().read32(vaddr))); + return 1; +} + +static int writeFloatThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + const float value = (float)lua_tonumber(L, 2); + LuaManager::g_emulator->getMemory().write32(vaddr, Helpers::bit_cast(value)); + return 0; +} + +static int readDoubleThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + lua_pushnumber(L, (lua_Number)Helpers::bit_cast(LuaManager::g_emulator->getMemory().read64(vaddr))); + return 1; +} + +static int writeDoubleThunk(lua_State* L) { + const u32 vaddr = (u32)lua_tonumber(L, 1); + const double value = (double)lua_tonumber(L, 2); + LuaManager::g_emulator->getMemory().write64(vaddr, Helpers::bit_cast(value)); + return 0; +} + static int getAppIDThunk(lua_State* L) { std::optional id = LuaManager::g_emulator->getMemory().getProgramID(); @@ -248,10 +274,14 @@ static constexpr luaL_Reg functions[] = { { "__read16", read16Thunk }, { "__read32", read32Thunk }, { "__read64", read64Thunk }, + { "__readFloat", readFloatThunk }, + { "__readDouble", readDoubleThunk }, { "__write8", write8Thunk} , { "__write16", write16Thunk }, { "__write32", write32Thunk }, { "__write64", write64Thunk }, + { "__writeFloat", writeFloatThunk }, + { "__writeDouble", writeDoubleThunk }, { "__getAppID", getAppIDThunk }, { "__pause", pauseThunk }, { "__resume", resumeThunk }, @@ -273,10 +303,15 @@ void LuaManager::initializeThunks() { read16 = function(addr) return GLOBALS.__read16(addr) end, read32 = function(addr) return GLOBALS.__read32(addr) end, read64 = function(addr) return GLOBALS.__read64(addr) end, + readFloat = function(addr) return GLOBALS.__readFloat(addr) end, + readDouble = function(addr) return GLOBALS.__readDouble(addr) end, + write8 = function(addr, value) GLOBALS.__write8(addr, value) end, write16 = function(addr, value) GLOBALS.__write16(addr, value) end, write32 = function(addr, value) GLOBALS.__write32(addr, value) end, write64 = function(addr, value) GLOBALS.__write64(addr, value) end, + writeFloat = function(addr, value) GLOBALS.__writeFloat(addr, value) end, + writeDouble = function(addr, value) GLOBALS.__writeDouble(addr, value) end, getAppID = function() local ffi = require("ffi") diff --git a/src/miniaudio.cpp b/src/miniaudio.cpp index e42fea68..a61979e0 100644 --- a/src/miniaudio.cpp +++ b/src/miniaudio.cpp @@ -1,5 +1,5 @@ // We do not need the ability to be able to encode or decode audio files for the time being -// So we disable said functionality to make the executable smaller +// So we disable said functionality to make the executable smaller. #define MA_NO_DECODING #define MA_NO_ENCODING #define MINIAUDIO_IMPLEMENTATION diff --git a/src/panda_qt/about_window.cpp b/src/panda_qt/about_window.cpp index 67767198..a388dad3 100644 --- a/src/panda_qt/about_window.cpp +++ b/src/panda_qt/about_window.cpp @@ -1,10 +1,13 @@ #include "panda_qt/about_window.hpp" +#include #include #include #include #include +#include "version.hpp" + // Based on https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/DolphinQt/AboutDialog.cpp AboutWindow::AboutWindow(QWidget* parent) : QDialog(parent) { @@ -17,6 +20,8 @@ AboutWindow::AboutWindow(QWidget* parent) : QDialog(parent) { QStringLiteral(R"(

Panda3DS

+

v%VERSION_STRING%

+

%ABOUT_PANDA3DS%
%SUPPORT%
@@ -26,6 +31,7 @@ AboutWindow::AboutWindow(QWidget* parent) : QDialog(parent) { %AUTHORS%

)") + .replace(QStringLiteral("%VERSION_STRING%"), PANDA3DS_VERSION) .replace(QStringLiteral("%ABOUT_PANDA3DS%"), tr("Panda3DS is a free and open source Nintendo 3DS emulator, for Windows, MacOS and Linux")) .replace(QStringLiteral("%SUPPORT%"), tr("Visit panda3ds.com for help with Panda3DS and links to our official support sites.")) .replace( diff --git a/src/panda_qt/cheats_window.cpp b/src/panda_qt/cheats_window.cpp index dbd251cc..cc2c94f6 100644 --- a/src/panda_qt/cheats_window.cpp +++ b/src/panda_qt/cheats_window.cpp @@ -1,15 +1,9 @@ #include "panda_qt/cheats_window.hpp" -#include -#include #include -#include -#include -#include -#include -#include -#include +#include #include +#include #include #include "cheats.hpp" @@ -18,71 +12,17 @@ MainWindow* mainWindow = nullptr; -struct CheatMetadata { - u32 handle = Cheats::badCheatHandle; - std::string name = "New cheat"; - std::string code; - bool enabled = true; -}; - void dispatchToMainThread(std::function callback) { - QTimer* timer = new QTimer(); - timer->moveToThread(qApp->thread()); - timer->setSingleShot(true); - QObject::connect(timer, &QTimer::timeout, [=]() - { - callback(); - timer->deleteLater(); - }); - QMetaObject::invokeMethod(timer, "start", Qt::QueuedConnection, Q_ARG(int, 0)); + QTimer* timer = new QTimer(); + timer->moveToThread(qApp->thread()); + timer->setSingleShot(true); + QObject::connect(timer, &QTimer::timeout, [=]() { + callback(); + timer->deleteLater(); + }); + QMetaObject::invokeMethod(timer, "start", Qt::QueuedConnection, Q_ARG(int, 0)); } -class CheatEntryWidget : public QWidget { - public: - CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent); - - void Update() { - name->setText(metadata.name.c_str()); - enabled->setChecked(metadata.enabled); - update(); - } - - void Remove() { - emu->getCheats().removeCheat(metadata.handle); - cheatList->takeItem(cheatList->row(listItem)); - deleteLater(); - } - - const CheatMetadata& getMetadata() { return metadata; } - void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; } - - private: - void checkboxChanged(int state); - void editClicked(); - - Emulator* emu; - CheatMetadata metadata; - u32 handle; - QLabel* name; - QCheckBox* enabled; - QListWidget* cheatList; - QListWidgetItem* listItem; -}; - -class CheatEditDialog : public QDialog { - public: - CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry); - - void accepted(); - void rejected(); - - private: - Emulator* emu; - CheatEntryWidget& cheatEntry; - QTextEdit* codeEdit; - QLineEdit* nameEdit; -}; - CheatEntryWidget::CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent) : QWidget(), emu(emu), metadata(metadata), cheatList(parent) { QHBoxLayout* layout = new QHBoxLayout; @@ -219,7 +159,7 @@ void CheatEditDialog::rejected() { CheatsWindow::CheatsWindow(Emulator* emu, const std::filesystem::path& cheatPath, QWidget* parent) : QWidget(parent, Qt::Window), emu(emu), cheatPath(cheatPath) { - mainWindow = static_cast(parent); + mainWindow = static_cast(parent); QVBoxLayout* layout = new QVBoxLayout; layout->setContentsMargins(6, 6, 6, 6); @@ -265,4 +205,4 @@ void CheatsWindow::removeClicked() { CheatEntryWidget* entry = static_cast(cheatList->itemWidget(item)); entry->Remove(); -} +} \ No newline at end of file diff --git a/src/panda_qt/elided_label.cpp b/src/panda_qt/elided_label.cpp new file mode 100644 index 00000000..f15cf11d --- /dev/null +++ b/src/panda_qt/elided_label.cpp @@ -0,0 +1,25 @@ +#include "panda_qt/elided_label.hpp" + +// Based on https://stackoverflow.com/questions/7381100/text-overflow-for-a-qlabel-s-text-rendering-in-qt +ElidedLabel::ElidedLabel(Qt::TextElideMode elideMode, QWidget* parent) : ElidedLabel("", elideMode, parent) {} + +ElidedLabel::ElidedLabel(QString text, Qt::TextElideMode elideMode, QWidget* parent) : QLabel(parent) { + m_elideMode = elideMode; + setText(text); +} + +void ElidedLabel::setText(QString text) { + m_text = text; + updateText(); +} + +void ElidedLabel::resizeEvent(QResizeEvent* event) { + QLabel::resizeEvent(event); + updateText(); +} + +void ElidedLabel::updateText() { + QFontMetrics metrics(font()); + QString elided = metrics.elidedText(m_text, m_elideMode, width()); + QLabel::setText(elided); +} \ No newline at end of file diff --git a/src/panda_qt/main_window.cpp b/src/panda_qt/main_window.cpp index da9d2706..28a704e6 100644 --- a/src/panda_qt/main_window.cpp +++ b/src/panda_qt/main_window.cpp @@ -7,17 +7,25 @@ #include #include +#include "version.hpp" #include "cheats.hpp" #include "input_mappings.hpp" +#include "sdl_sensors.hpp" #include "services/dsp.hpp" -MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()), screen(this) { +MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent), keyboardMappings(InputMappings::defaultKeyboardMappings()) { setWindowTitle("Alber"); + setWindowIcon(QIcon(":/docs/img/rpog_icon.png")); + // Enable drop events for loading ROMs setAcceptDrops(true); resize(800, 240 * 4); - screen.show(); + // We pass a callback to the screen widget that will be triggered every time we resize the screen + screen = new ScreenWidget([this](u32 width, u32 height) { handleScreenResize(width, height); }, this); + setCentralWidget(screen); + + screen->show(); appRunning = true; // Set our menu bar up @@ -54,24 +62,35 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) auto dumpRomFSAction = toolsMenu->addAction(tr("Dump RomFS")); auto luaEditorAction = toolsMenu->addAction(tr("Open Lua Editor")); auto cheatsEditorAction = toolsMenu->addAction(tr("Open Cheats Editor")); + auto patchWindowAction = toolsMenu->addAction(tr("Open Patch Window")); + auto shaderEditorAction = toolsMenu->addAction(tr("Open Shader Editor")); auto dumpDspFirmware = toolsMenu->addAction(tr("Dump loaded DSP firmware")); connect(dumpRomFSAction, &QAction::triggered, this, &MainWindow::dumpRomFS); - connect(luaEditorAction, &QAction::triggered, this, &MainWindow::openLuaEditor); - connect(cheatsEditorAction, &QAction::triggered, this, &MainWindow::openCheatsEditor); + connect(luaEditorAction, &QAction::triggered, this, [this]() { luaEditor->show(); }); + connect(shaderEditorAction, &QAction::triggered, this, [this]() { shaderEditor->show(); }); + connect(cheatsEditorAction, &QAction::triggered, this, [this]() { cheatsEditor->show(); }); + connect(patchWindowAction, &QAction::triggered, this, [this]() { patchWindow->show(); }); connect(dumpDspFirmware, &QAction::triggered, this, &MainWindow::dumpDspFirmware); auto aboutAction = aboutMenu->addAction(tr("About Panda3DS")); connect(aboutAction, &QAction::triggered, this, &MainWindow::showAboutMenu); emu = new Emulator(); - emu->setOutputSize(screen.surfaceWidth, screen.surfaceHeight); + emu->setOutputSize(screen->surfaceWidth, screen->surfaceHeight); // Set up misc objects aboutWindow = new AboutWindow(nullptr); configWindow = new ConfigWindow(this); cheatsEditor = new CheatsWindow(emu, {}, this); + patchWindow = new PatchWindow(this); luaEditor = new TextEditorWindow(this, "script.lua", ""); + shaderEditor = new ShaderEditorWindow(this, "shader.glsl", ""); + + shaderEditor->setEnable(emu->getRenderer()->supportsShaderReload()); + if (shaderEditor->supported) { + shaderEditor->setText(emu->getRenderer()->getUbershader()); + } auto args = QCoreApplication::arguments(); if (args.size() > 1) { @@ -82,21 +101,42 @@ MainWindow::MainWindow(QApplication* app, QWidget* parent) : QMainWindow(parent) } } + // Handle UI configs before setting up the emulator thread + { + auto& config = emu->getConfig(); + auto& windowSettings = config.windowSettings; + + if (windowSettings.showAppVersion) { + setWindowTitle("Alber v" PANDA3DS_VERSION); + } + + if (windowSettings.rememberPosition) { + setGeometry(windowSettings.x, windowSettings.y, windowSettings.width, config.windowSettings.height); + } + + if (config.printAppVersion) { + printf("Welcome to Panda3DS v%s!\n", PANDA3DS_VERSION); + } + } + // The emulator graphics context for the thread should be initialized in the emulator thread due to how GL contexts work emuThread = std::thread([this]() { const RendererType rendererType = emu->getConfig().rendererType; usingGL = (rendererType == RendererType::OpenGL || rendererType == RendererType::Software || rendererType == RendererType::Null); usingVk = (rendererType == RendererType::Vulkan); + usingMtl = (rendererType == RendererType::Metal); if (usingGL) { // Make GL context current for this thread, enable VSync - GL::Context* glContext = screen.getGLContext(); + GL::Context* glContext = screen->getGLContext(); glContext->MakeCurrent(); glContext->SetSwapInterval(emu->getConfig().vsyncEnabled ? 1 : 0); emu->initGraphicsContext(glContext); } else if (usingVk) { Helpers::panic("Vulkan on Qt is currently WIP, try the SDL frontend instead!"); + } else if (usingMtl) { + Helpers::panic("Metal on Qt currently doesn't work, try the SDL frontend instead!"); } else { Helpers::panic("Unsupported graphics backend for Qt frontend!"); } @@ -134,13 +174,13 @@ void MainWindow::emuThreadMainLoop() { // Unbind GL context if we're using GL, otherwise some setups seem to be unable to join this thread if (usingGL) { - screen.getGLContext()->DoneCurrent(); + screen->getGLContext()->DoneCurrent(); } } void MainWindow::swapEmuBuffer() { if (usingGL) { - screen.getGLContext()->SwapBuffers(); + screen->getGLContext()->SwapBuffers(); } else { Helpers::panic("[Qt] Don't know how to swap buffers for the current rendering backend :("); } @@ -189,14 +229,26 @@ void MainWindow::selectLuaFile() { } } -// Cleanup when the main window closes -MainWindow::~MainWindow() { +// Stop emulator thread when the main window closes +void MainWindow::closeEvent(QCloseEvent *event) { appRunning = false; // Set our running atomic to false in order to make the emulator thread stop, and join it if (emuThread.joinable()) { emuThread.join(); } + // Cache window position/size in config file to restore next time + const QRect& windowGeometry = geometry(); + auto& windowConfig = emu->getConfig().windowSettings; + + windowConfig.x = windowGeometry.x(); + windowConfig.y = windowGeometry.y(); + windowConfig.width = windowGeometry.width(); + windowConfig.height = windowGeometry.height(); +} + +// Cleanup when the main window closes +MainWindow::~MainWindow() { delete emu; delete menuBar; delete aboutWindow; @@ -291,9 +343,6 @@ void MainWindow::showAboutMenu() { about.exec(); } -void MainWindow::openLuaEditor() { luaEditor->show(); } -void MainWindow::openCheatsEditor() { cheatsEditor->show(); } - void MainWindow::dispatchMessage(const EmulatorMessage& message) { switch (message.type) { case MessageType::LoadROM: @@ -347,6 +396,20 @@ void MainWindow::dispatchMessage(const EmulatorMessage& message) { emu->getServiceManager().getHID().setTouchScreenPress(message.touchscreen.x, message.touchscreen.y); break; case MessageType::ReleaseTouchscreen: emu->getServiceManager().getHID().releaseTouchScreen(); break; + + case MessageType::ReloadUbershader: + emu->getRenderer()->setUbershader(*message.string.str); + delete message.string.str; + break; + + case MessageType::SetScreenSize: { + const u32 width = message.screenSize.width; + const u32 height = message.screenSize.height; + + emu->setOutputSize(width, height); + screen->resizeSurface(width, height); + break; + } } } @@ -410,13 +473,13 @@ void MainWindow::keyReleaseEvent(QKeyEvent* event) { void MainWindow::mousePressEvent(QMouseEvent* event) { if (event->button() == Qt::MouseButton::LeftButton) { const QPointF clickPos = event->globalPosition(); - const QPointF widgetPos = screen.mapFromGlobal(clickPos); + const QPointF widgetPos = screen->mapFromGlobal(clickPos); // Press is inside the screen area - if (widgetPos.x() >= 0 && widgetPos.x() < screen.width() && widgetPos.y() >= 0 && widgetPos.y() < screen.height()) { + if (widgetPos.x() >= 0 && widgetPos.x() < screen->width() && widgetPos.y() >= 0 && widgetPos.y() < screen->height()) { // Go from widget positions to [0, 400) for x and [0, 480) for y - uint x = (uint)std::round(widgetPos.x() / screen.width() * 400.f); - uint y = (uint)std::round(widgetPos.y() / screen.height() * 480.f); + uint x = (uint)std::round(widgetPos.x() / screen->width() * 400.f); + uint y = (uint)std::round(widgetPos.y() / screen->height() * 480.f); // Check if touch falls in the touch screen area if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { @@ -449,6 +512,14 @@ void MainWindow::loadLuaScript(const std::string& code) { sendMessage(message); } +void MainWindow::reloadShader(const std::string& shader) { + EmulatorMessage message{.type = MessageType::ReloadUbershader}; + + // Make a copy of the code on the heap to send via the message queue + message.string.str = new std::string(shader); + sendMessage(message); +} + void MainWindow::editCheat(u32 handle, const std::vector& cheat, const std::function& callback) { EmulatorMessage message{.type = MessageType::EditCheat}; @@ -461,6 +532,14 @@ void MainWindow::editCheat(u32 handle, const std::vector& cheat, const sendMessage(message); } +void MainWindow::handleScreenResize(u32 width, u32 height) { + EmulatorMessage message{.type = MessageType::SetScreenSize}; + message.screenSize.width = width; + message.screenSize.height = height; + + sendMessage(message); +} + void MainWindow::initControllers() { // Make SDL use consistent positional button mapping SDL_SetHint(SDL_HINT_GAMECONTROLLER_USE_BUTTON_LABELS, "0"); @@ -476,6 +555,8 @@ void MainWindow::initControllers() { SDL_Joystick* stick = SDL_GameControllerGetJoystick(gameController); gameControllerID = SDL_JoystickInstanceID(stick); } + + setupControllerSensors(gameController); } } @@ -513,6 +594,8 @@ void MainWindow::pollControllers() { if (gameController == nullptr) { gameController = SDL_GameControllerOpen(event.cdevice.which); gameControllerID = event.cdevice.which; + + setupControllerSensors(gameController); } break; @@ -553,6 +636,37 @@ void MainWindow::pollControllers() { } break; } + + case SDL_CONTROLLERSENSORUPDATE: { + if (event.csensor.sensor == SDL_SENSOR_GYRO) { + auto rotation = Sensors::SDL::convertRotation({ + event.csensor.data[0], + event.csensor.data[1], + event.csensor.data[2], + }); + + hid.setPitch(s16(rotation.x)); + hid.setRoll(s16(rotation.y)); + hid.setYaw(s16(rotation.z)); + } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { + auto accel = Sensors::SDL::convertAcceleration(event.csensor.data); + hid.setAccel(accel.x, accel.y, accel.z); + } + break; + } } } -} \ No newline at end of file +} + +void MainWindow::setupControllerSensors(SDL_GameController* controller) { + bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + bool haveAccelerometer = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; + + if (haveGyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); + } + + if (haveAccelerometer) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } +} diff --git a/src/panda_qt/mappings.cpp b/src/panda_qt/mappings.cpp index 22741a73..d41b0a31 100644 --- a/src/panda_qt/mappings.cpp +++ b/src/panda_qt/mappings.cpp @@ -1,7 +1,7 @@ -#include "input_mappings.hpp" - #include +#include "input_mappings.hpp" + InputMappings InputMappings::defaultKeyboardMappings() { InputMappings mappings; mappings.setMapping(Qt::Key_L, HID::Keys::A); diff --git a/src/panda_qt/patch_window.cpp b/src/panda_qt/patch_window.cpp new file mode 100644 index 00000000..189288eb --- /dev/null +++ b/src/panda_qt/patch_window.cpp @@ -0,0 +1,158 @@ +#include "panda_qt/patch_window.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include "hips.hpp" +#include "io_file.hpp" + +PatchWindow::PatchWindow(QWidget* parent) : QWidget(parent, Qt::Window) { + QVBoxLayout* layout = new QVBoxLayout; + layout->setContentsMargins(6, 6, 6, 6); + setLayout(layout); + + QWidget* inputBox = new QWidget; + QHBoxLayout* inputLayout = new QHBoxLayout; + QLabel* inputText = new QLabel(tr("Select input file")); + QPushButton* inputButton = new QPushButton(tr("Select")); + inputPathLabel = new ElidedLabel(""); + inputPathLabel->setFixedWidth(200); + + inputLayout->addWidget(inputText); + inputLayout->addWidget(inputButton); + inputLayout->addWidget(inputPathLabel); + inputBox->setLayout(inputLayout); + + QWidget* patchBox = new QWidget; + QHBoxLayout* patchLayout = new QHBoxLayout; + QLabel* patchText = new QLabel(tr("Select patch file")); + QPushButton* patchButton = new QPushButton(tr("Select")); + patchPathLabel = new ElidedLabel(""); + patchPathLabel->setFixedWidth(200); + + patchLayout->addWidget(patchText); + patchLayout->addWidget(patchButton); + patchLayout->addWidget(patchPathLabel); + patchBox->setLayout(patchLayout); + + QWidget* actionBox = new QWidget; + QHBoxLayout* actionLayout = new QHBoxLayout; + QPushButton* applyPatchButton = new QPushButton(tr("Apply patch")); + actionLayout->addWidget(applyPatchButton); + actionBox->setLayout(actionLayout); + + layout->addWidget(inputBox); + layout->addWidget(patchBox); + layout->addWidget(actionBox); + + connect(inputButton, &QPushButton::clicked, this, [this]() { + auto path = QFileDialog::getOpenFileName(this, tr("Select file to patch"), "", tr("All files (*.*)")); + inputPath = std::filesystem::path(path.toStdU16String()); + + inputPathLabel->setText(path); + }); + + connect(patchButton, &QPushButton::clicked, this, [this]() { + auto path = QFileDialog::getOpenFileName(this, tr("Select patch file"), "", tr("Patch files (*.ips *.ups *.bps)")); + patchPath = std::filesystem::path(path.toStdU16String()); + + patchPathLabel->setText(path); + }); + + connect(applyPatchButton, &QPushButton::clicked, this, [this]() { + if (inputPath.empty() || patchPath.empty()) { + displayMessage(tr("Paths not provided correctly"), tr("Please provide paths for both the input file and the patch file")); + return; + } + + // std::filesystem::path only has += and not + for reasons unknown to humanity + auto defaultPath = inputPath.parent_path() / inputPath.stem(); + defaultPath += "-patched"; + defaultPath += inputPath.extension(); + + auto path = QFileDialog::getSaveFileName(this, tr("Select file"), QString::fromStdU16String(defaultPath.u16string()), tr("All files (*.*)")); + std::filesystem::path outputPath = std::filesystem::path(path.toStdU16String()); + + if (outputPath.empty()) { + displayMessage(tr("No output path"), tr("No path was provided for the output file, no patching was done")); + return; + } + + Hips::PatchType patchType; + auto extension = patchPath.extension(); + + // Figure out what sort of patch we're dealing with + if (extension == ".ips") { + patchType = Hips::PatchType::IPS; + } else if (extension == ".ups") { + patchType = Hips::PatchType::UPS; + } else if (extension == ".bps") { + patchType = Hips::PatchType::BPS; + } else { + displayMessage(tr("Unknown patch format"), tr("Unknown format for patch file. Currently IPS, UPS and BPS are supported")); + return; + } + + // Read input and patch files into buffers + IOFile input(inputPath, "rb"); + IOFile patch(patchPath, "rb"); + + if (!input.isOpen() || !patch.isOpen()) { + displayMessage(tr("Failed to open input files"), tr("Make sure they're in a directory Panda3DS has access to")); + return; + } + + // Read the files into arrays + const auto inputSize = *input.size(); + const auto patchSize = *patch.size(); + + std::unique_ptr inputData(new uint8_t[inputSize]); + std::unique_ptr patchData(new uint8_t[patchSize]); + + input.rewind(); + patch.rewind(); + input.readBytes(inputData.get(), inputSize); + patch.readBytes(patchData.get(), patchSize); + + auto [bytes, result] = Hips::patch(inputData.get(), inputSize, patchData.get(), patchSize, patchType); + + // Write patched file + if (!bytes.empty()) { + IOFile output(outputPath, "wb"); + output.writeBytes(bytes.data(), bytes.size()); + } + + switch (result) { + case Hips::Result::Success: + displayMessage( + tr("Patching Success"), tr("Your file was patched successfully."), QMessageBox::Icon::Information, ":/docs/img/rpog_icon.png" + ); + break; + + case Hips::Result::ChecksumMismatch: + displayMessage( + tr("Checksum mismatch"), + tr("Patch was applied successfully but a checksum mismatch was detected. The input or output files might not be correct") + ); + break; + + default: displayMessage(tr("Patching error"), tr("An error occured while patching")); break; + } + }); +} + +void PatchWindow::PatchWindow::displayMessage(const QString& title, const QString& message, QMessageBox::Icon icon, const char* iconPath) { + QMessageBox messageBox(icon, title, message); + QAbstractButton* button = messageBox.addButton(tr("OK"), QMessageBox::ButtonRole::YesRole); + + if (iconPath != nullptr) { + button->setIcon(QIcon(iconPath)); + } + + messageBox.exec(); +} \ No newline at end of file diff --git a/src/panda_qt/screen.cpp b/src/panda_qt/screen.cpp index 5a254e79..25ff576c 100644 --- a/src/panda_qt/screen.cpp +++ b/src/panda_qt/screen.cpp @@ -18,7 +18,7 @@ // and https://github.com/melonDS-emu/melonDS/blob/master/src/frontend/qt_sdl/main.cpp #ifdef PANDA3DS_ENABLE_OPENGL -ScreenWidget::ScreenWidget(QWidget* parent) : QWidget(parent) { +ScreenWidget::ScreenWidget(ResizeCallback resizeCallback, QWidget* parent) : QWidget(parent), resizeCallback(resizeCallback) { // Create a native window for use with our graphics API of choice resize(800, 240 * 4); @@ -35,6 +35,30 @@ ScreenWidget::ScreenWidget(QWidget* parent) : QWidget(parent) { } } +void ScreenWidget::resizeEvent(QResizeEvent* event) { + previousWidth = surfaceWidth; + previousHeight = surfaceHeight; + QWidget::resizeEvent(event); + + // Update surfaceWidth/surfaceHeight following the resize + std::optional windowInfo = getWindowInfo(); + if (windowInfo) { + this->windowInfo = *windowInfo; + } + + // This will call take care of calling resizeSurface from the emulator thread + resizeCallback(surfaceWidth, surfaceHeight); +} + +// Note: This will run on the emulator thread, we don't want any Qt calls happening there. +void ScreenWidget::resizeSurface(u32 width, u32 height) { + if (previousWidth != width || previousHeight != height) { + if (glContext) { + glContext->ResizeSurface(width, height); + } + } +} + bool ScreenWidget::createGLContext() { // List of GL context versions we will try. Anything 4.1+ is good static constexpr std::array versionsToTry = { @@ -45,6 +69,8 @@ bool ScreenWidget::createGLContext() { std::optional windowInfo = getWindowInfo(); if (windowInfo.has_value()) { + this->windowInfo = *windowInfo; + glContext = GL::Context::Create(*getWindowInfo(), versionsToTry); glContext->DoneCurrent(); } @@ -110,4 +136,4 @@ std::optional ScreenWidget::getWindowInfo() { return wi; } -#endif \ No newline at end of file +#endif diff --git a/src/panda_qt/shader_editor.cpp b/src/panda_qt/shader_editor.cpp new file mode 100644 index 00000000..4ca41e22 --- /dev/null +++ b/src/panda_qt/shader_editor.cpp @@ -0,0 +1,55 @@ +#include "panda_qt/shader_editor.hpp" + +#include +#include + +#include "panda_qt/main_window.hpp" + +using namespace Zep; + +ShaderEditorWindow::ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText) + : QDialog(parent), zepWidget(this, qApp->applicationDirPath().toStdString(), fontSize) { + resize(600, 600); + + // Register our extensions + ZepRegressExCommand::Register(zepWidget.GetEditor()); + ZepReplExCommand::Register(zepWidget.GetEditor(), &replProvider); + + // Default to standard mode instead of vim mode, initialize text box + zepWidget.GetEditor().InitWithText(filename, initialText); + zepWidget.GetEditor().SetGlobalMode(Zep::ZepMode_Standard::StaticName()); + + // Layout for widgets + QVBoxLayout* mainLayout = new QVBoxLayout(); + setLayout(mainLayout); + + QPushButton* button = new QPushButton(tr("Reload shader"), this); + button->setFixedSize(100, 20); + + // When the Load Script button is pressed, send the current text to the MainWindow, which will upload it to the emulator's lua object + connect(button, &QPushButton::pressed, this, [this]() { + if (parentWidget()) { + auto buffer = zepWidget.GetEditor().GetMRUBuffer(); + const std::string text = buffer->GetBufferText(buffer->Begin(), buffer->End()); + + static_cast(parentWidget())->reloadShader(text); + } else { + // This should be unreachable, only here for safety purposes + printf("Text editor does not have any parent widget, click doesn't work :(\n"); + } + }); + + mainLayout->addWidget(button); + mainLayout->addWidget(&zepWidget); +} + +void ShaderEditorWindow::setEnable(bool enable) { + supported = enable; + + if (enable) { + setDisabled(false); + } else { + setDisabled(true); + setText("Shader editor window is not available for this renderer backend"); + } +} diff --git a/src/panda_sdl/frontend_sdl.cpp b/src/panda_sdl/frontend_sdl.cpp index 0c78eea1..01f48acd 100644 --- a/src/panda_sdl/frontend_sdl.cpp +++ b/src/panda_sdl/frontend_sdl.cpp @@ -2,6 +2,10 @@ #include +#include "renderdoc.hpp" +#include "sdl_sensors.hpp" +#include "version.hpp" + FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMappings()) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { Helpers::panic("Failed to initialize SDL2"); @@ -20,6 +24,8 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp SDL_Joystick* stick = SDL_GameControllerGetJoystick(gameController); gameControllerID = SDL_JoystickInstanceID(stick); } + + setupControllerSensors(gameController); } const EmulatorConfig& config = emu.getConfig(); @@ -29,13 +35,35 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp needOpenGL = needOpenGL || (config.rendererType == RendererType::OpenGL); #endif + const char* windowTitle = config.windowSettings.showAppVersion ? ("Alber v" PANDA3DS_VERSION) : "Alber"; + if (config.printAppVersion) { + printf("Welcome to Panda3DS v%s!\n", PANDA3DS_VERSION); + } + + // Positions of the window + int windowX, windowY; + + // Apply window size settings if the appropriate option is enabled + if (config.windowSettings.rememberPosition) { + windowX = config.windowSettings.x; + windowY = config.windowSettings.y; + windowWidth = config.windowSettings.width; + windowHeight = config.windowSettings.height; + } else { + windowX = SDL_WINDOWPOS_CENTERED; + windowY = SDL_WINDOWPOS_CENTERED; + windowWidth = 400; + windowHeight = 480; + } + emu.setOutputSize(windowWidth, windowHeight); + if (needOpenGL) { // Demand 3.3 core for software renderer, or 4.1 core for OpenGL renderer (max available on MacOS) // MacOS gets mad if we don't explicitly demand a core profile SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::Software ? 3 : 4); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::Software ? 3 : 1); - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL); + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::panic("Window creation failed: %s", SDL_GetError()); @@ -55,7 +83,17 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp #ifdef PANDA3DS_ENABLE_VULKAN if (config.rendererType == RendererType::Vulkan) { - window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_VULKAN); + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_VULKAN | SDL_WINDOW_RESIZABLE); + + if (window == nullptr) { + Helpers::warn("Window creation failed: %s", SDL_GetError()); + } + } +#endif + +#ifdef PANDA3DS_ENABLE_METAL + if (config.rendererType == RendererType::Metal) { + window = SDL_CreateWindow(windowTitle, windowX, windowY, windowWidth, windowHeight, SDL_WINDOW_METAL | SDL_WINDOW_RESIZABLE); if (window == nullptr) { Helpers::warn("Window creation failed: %s", SDL_GetError()); @@ -87,10 +125,16 @@ void FrontendSDL::run() { namespace Keys = HID::Keys; switch (event.type) { - case SDL_QUIT: + case SDL_QUIT: { printf("Bye :(\n"); programRunning = false; + + // Remember window position & size for future runs + auto& windowSettings = emu.getConfig().windowSettings; + SDL_GetWindowPosition(window, &windowSettings.x, &windowSettings.y); + SDL_GetWindowSize(window, &windowSettings.width, &windowSettings.height); return; + } case SDL_KEYDOWN: { if (emu.romType == ROMType::None) break; @@ -130,6 +174,14 @@ void FrontendSDL::run() { emu.reset(Emulator::ReloadOption::Reload); break; } + + case SDLK_F11: { + if constexpr (Renderdoc::isSupported()) { + Renderdoc::triggerCapture(); + } + + break; + } } } break; @@ -162,8 +214,13 @@ void FrontendSDL::run() { if (emu.romType == ROMType::None) break; if (event.button.button == SDL_BUTTON_LEFT) { - const s32 x = event.button.x; - const s32 y = event.button.y; + if (windowWidth == 0 || windowHeight == 0) [[unlikely]] { + break; + } + + // Go from window positions to [0, 400) for x and [0, 480) for y + const s32 x = (s32)std::round(event.button.x * 400.f / windowWidth); + const s32 y = (s32)std::round(event.button.y * 480.f / windowHeight); // Check if touch falls in the touch screen area if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { @@ -195,6 +252,8 @@ void FrontendSDL::run() { if (gameController == nullptr) { gameController = SDL_GameControllerOpen(event.cdevice.which); gameControllerID = event.cdevice.which; + + setupControllerSensors(gameController); } break; @@ -242,8 +301,13 @@ void FrontendSDL::run() { // Handle "dragging" across the touchscreen if (hid.isTouchScreenPressed()) { - const s32 x = event.motion.x; - const s32 y = event.motion.y; + if (windowWidth == 0 || windowHeight == 0) [[unlikely]] { + break; + } + + // Go from window positions to [0, 400) for x and [0, 480) for y + const s32 x = (s32)std::round(event.motion.x * 400.f / windowWidth); + const s32 y = (s32)std::round(event.motion.y * 480.f / windowHeight); // Check if touch falls in the touch screen area and register the new touch screen position if (y >= 240 && y <= 480 && x >= 40 && x < 40 + 320) { @@ -271,6 +335,24 @@ void FrontendSDL::run() { break; } + case SDL_CONTROLLERSENSORUPDATE: { + if (event.csensor.sensor == SDL_SENSOR_GYRO) { + auto rotation = Sensors::SDL::convertRotation({ + event.csensor.data[0], + event.csensor.data[1], + event.csensor.data[2], + }); + + hid.setPitch(s16(rotation.x)); + hid.setRoll(s16(rotation.y)); + hid.setYaw(s16(rotation.z)); + } else if (event.csensor.sensor == SDL_SENSOR_ACCEL) { + auto accel = Sensors::SDL::convertAcceleration(event.csensor.data); + hid.setAccel(accel.x, accel.y, accel.z); + } + break; + } + case SDL_DROPFILE: { char* droppedDir = event.drop.file; @@ -289,6 +371,15 @@ void FrontendSDL::run() { } break; } + + case SDL_WINDOWEVENT: { + auto type = event.window.event; + if (type == SDL_WINDOWEVENT_RESIZED) { + windowWidth = event.window.data1; + windowHeight = event.window.data2; + emu.setOutputSize(windowWidth, windowHeight); + } + } } } @@ -323,3 +414,16 @@ void FrontendSDL::run() { SDL_GL_SwapWindow(window); } } + +void FrontendSDL::setupControllerSensors(SDL_GameController* controller) { + bool haveGyro = SDL_GameControllerHasSensor(controller, SDL_SENSOR_GYRO) == SDL_TRUE; + bool haveAccelerometer = SDL_GameControllerHasSensor(controller, SDL_SENSOR_ACCEL) == SDL_TRUE; + + if (haveGyro) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_GYRO, SDL_TRUE); + } + + if (haveAccelerometer) { + SDL_GameControllerSetSensorEnabled(controller, SDL_SENSOR_ACCEL, SDL_TRUE); + } +} diff --git a/src/pandroid/app/src/main/ic_launcher-playstore.png b/src/pandroid/app/src/main/ic_launcher-playstore.png new file mode 100644 index 00000000..04b65284 Binary files /dev/null and b/src/pandroid/app/src/main/ic_launcher-playstore.png differ diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java index f7a3394b..bb3945b5 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/AlberDriver.java @@ -24,13 +24,16 @@ public class AlberDriver { public static native void KeyUp(int code); public static native void SetCirclepadAxis(int x, int y); public static native void TouchScreenUp(); - public static native void TouchScreenDown(int x, int y); + public static native void TouchScreenDown(int x, int y);; + public static native void SetGyro(float roll, float pitch, float yaw); + public static native void SetAccel(float x, float y, float z); public static native void Pause(); public static native void Resume(); public static native void LoadLuaScript(String script); public static native byte[] GetSmdh(); public static native void setShaderJitEnabled(boolean enable); + public static native void setAccurateShaderMulEnable(boolean enable); public static int openDocument(String path, String mode) { try { diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java index 83d18d99..503684ac 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/GameActivity.java @@ -3,11 +3,22 @@ package com.panda3ds.pandroid.app; import android.app.ActivityManager; import android.app.PictureInPictureParams; import android.content.Intent; +import android.content.res.Configuration; +import android.hardware.Sensor; +import android.hardware.SensorEvent; +import android.hardware.SensorEventListener; +import android.hardware.SensorManager; +import android.opengl.Matrix; import android.os.Build; import android.os.Bundle; +import android.renderscript.Matrix3f; +import android.renderscript.Matrix4f; +import android.util.Log; import android.util.Rational; +import android.view.Display; import android.view.KeyEvent; import android.view.MotionEvent; +import android.view.Surface; import android.view.View; import android.view.ViewGroup; import android.view.WindowManager; @@ -25,6 +36,7 @@ import com.panda3ds.pandroid.app.game.EmulatorCallback; import com.panda3ds.pandroid.data.config.GlobalConfig; import com.panda3ds.pandroid.input.InputHandler; import com.panda3ds.pandroid.input.InputMap; +import com.panda3ds.pandroid.math.Vector3; import com.panda3ds.pandroid.utils.Constants; import com.panda3ds.pandroid.view.PandaGlSurfaceView; import com.panda3ds.pandroid.view.PandaLayoutController; @@ -32,7 +44,7 @@ import com.panda3ds.pandroid.view.ds.DsLayoutManager; import com.panda3ds.pandroid.view.renderer.ConsoleRenderer; import com.panda3ds.pandroid.view.utils.PerformanceView; -public class GameActivity extends BaseActivity implements EmulatorCallback { +public class GameActivity extends BaseActivity implements EmulatorCallback, SensorEventListener { private final DrawerFragment drawerFragment = new DrawerFragment(); private final AlberInputListener inputListener = new AlberInputListener(this); private ConsoleRenderer renderer; @@ -74,6 +86,19 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { ((FrameLayout) findViewById(R.id.panda_gl_frame)).addView(view, new FrameLayout.LayoutParams(ViewGroup.LayoutParams.WRAP_CONTENT, ViewGroup.LayoutParams.WRAP_CONTENT)); } swapScreens(GlobalConfig.get(GlobalConfig.KEY_CURRENT_DS_LAYOUT)); + registerSensors(); + } + + private void registerSensors() { + SensorManager sensorManager = (SensorManager) getSystemService(SENSOR_SERVICE); + Sensor accel = sensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER); + if (accel != null) { + sensorManager.registerListener(this, accel, 1); + } + Sensor gryro = sensorManager.getDefaultSensor(Sensor.TYPE_GYROSCOPE); + if (gryro != null) { + sensorManager.registerListener(this, gryro, 1); + } } private void changeOverlayVisibility(boolean visible) { @@ -85,7 +110,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { @Override protected void onResume() { super.onResume(); - getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); + getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); getWindow().getDecorView().setSystemUiVisibility(View.SYSTEM_UI_FLAG_FULLSCREEN | View.SYSTEM_UI_FLAG_HIDE_NAVIGATION); getWindow().addFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN); InputHandler.reset(); @@ -94,6 +119,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) { getTheme().applyStyle(R.style.GameActivityNavigationBar, true); } + registerSensors(); } private void enablePIP() { @@ -113,6 +139,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { protected void onPause() { super.onPause(); + ((SensorManager)getSystemService(SENSOR_SERVICE)).unregisterListener(this); InputHandler.reset(); if (GlobalConfig.get(GlobalConfig.KEY_PICTURE_IN_PICTURE)) { if (Build.VERSION.SDK_INT > Build.VERSION_CODES.O) { @@ -174,10 +201,48 @@ public class GameActivity extends BaseActivity implements EmulatorCallback { @Override protected void onDestroy() { + ((SensorManager)getSystemService(SENSOR_SERVICE)).unregisterListener(this); if (AlberDriver.HasRomLoaded()) { AlberDriver.Finalize(); } super.onDestroy(); } + + private float getDeviceRotationAngle() { + if (getWindow().getDecorView() == null || getWindow().getDecorView().getDisplay() == null) + return 0.0f; + + int rotation = getWindow().getDecorView().getDisplay().getRotation(); + switch (rotation) { + case Surface.ROTATION_90: return 90.0f; + case Surface.ROTATION_180: return 180.0f; + case Surface.ROTATION_270: return -90.0f; + default: return 0.0f; + } + } + + @Override + public void onSensorChanged(SensorEvent event) { + if (AlberDriver.HasRomLoaded()) { + Sensor sensor = event.sensor; + switch (sensor.getType()) { + case Sensor.TYPE_ACCELEROMETER: { + float[] values = event.values; + Vector3 vec3 = new Vector3(values[0], values[1], values[2]); + vec3.rotateByEuler(new Vector3(0, 0, (float) (getDeviceRotationAngle() * (Math.PI / 180.0f)))); + AlberDriver.SetAccel(vec3.x, vec3.y, vec3.z); + } break; + case Sensor.TYPE_GYROSCOPE: { + float[] values = event.values; + Vector3 vec3 = new Vector3(values[0], values[1], values[2]); + vec3.rotateByEuler(new Vector3(0, 0, (float) (getDeviceRotationAngle() * (Math.PI / 180.0f)))); + AlberDriver.SetGyro(vec3.x, vec3.y, vec3.z); + } break; + } + } + } + + @Override + public void onAccuracyChanged(Sensor sensor, int accuracy) {} } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java index 9426c098..ae8d49ad 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/base/BasePreferenceFragment.java @@ -26,6 +26,10 @@ public abstract class BasePreferenceFragment extends PreferenceFragmentCompat { ((SwitchPreferenceCompat)findPreference(id)).setChecked(value); } + protected void setSummaryValue(String id,String text) { + findPreference(id).setSummary(text); + } + protected void setActivityTitle(@StringRes int titleId) { ActionBar header = ((AppCompatActivity) requireActivity()).getSupportActionBar(); if (header != null) { diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java index 176bab14..8d04403e 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/AdvancedPreferences.java @@ -22,6 +22,7 @@ public class AdvancedPreferences extends BasePreferenceFragment { setItemClick("performanceMonitor", pref -> GlobalConfig.set(GlobalConfig.KEY_SHOW_PERFORMANCE_OVERLAY, ((SwitchPreferenceCompat) pref).isChecked())); setItemClick("shaderJit", pref -> GlobalConfig.set(GlobalConfig.KEY_SHADER_JIT, ((SwitchPreferenceCompat) pref).isChecked())); + setItemClick("accurateShaderMul", pref -> GlobalConfig.set(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY, ((SwitchPreferenceCompat) pref).isChecked())); setItemClick("loggerService", pref -> { boolean checked = ((SwitchPreferenceCompat) pref).isChecked(); Context ctx = PandroidApplication.getAppContext(); @@ -46,5 +47,6 @@ public class AdvancedPreferences extends BasePreferenceFragment { ((SwitchPreferenceCompat) findPreference("performanceMonitor")).setChecked(GlobalConfig.get(GlobalConfig.KEY_SHOW_PERFORMANCE_OVERLAY)); ((SwitchPreferenceCompat) findPreference("loggerService")).setChecked(GlobalConfig.get(GlobalConfig.KEY_LOGGER_SERVICE)); ((SwitchPreferenceCompat) findPreference("shaderJit")).setChecked(GlobalConfig.get(GlobalConfig.KEY_SHADER_JIT)); + ((SwitchPreferenceCompat) findPreference("accurateShaderMul")).setChecked(GlobalConfig.get(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY)); } } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java index 0b003db9..86182c3b 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/GeneralPreferences.java @@ -1,7 +1,13 @@ package com.panda3ds.pandroid.app.preferences; +import android.net.Uri; import android.os.Bundle; +import android.util.Log; +import android.widget.Toast; +import androidx.activity.result.ActivityResultCallback; +import androidx.activity.result.ActivityResultLauncher; +import androidx.activity.result.contract.ActivityResultContracts; import androidx.annotation.Nullable; import androidx.preference.SwitchPreferenceCompat; @@ -10,8 +16,11 @@ import com.panda3ds.pandroid.app.PreferenceActivity; import com.panda3ds.pandroid.app.base.BasePreferenceFragment; import com.panda3ds.pandroid.app.preferences.screen_editor.ScreenLayoutsPreference; import com.panda3ds.pandroid.data.config.GlobalConfig; +import com.panda3ds.pandroid.utils.FileUtils; -public class GeneralPreferences extends BasePreferenceFragment { +public class GeneralPreferences extends BasePreferenceFragment implements ActivityResultCallback { + private final ActivityResultContracts.OpenDocument openFolderContract = new ActivityResultContracts.OpenDocument(); + private ActivityResultLauncher pickFileRequest; @Override public void onCreatePreferences(@Nullable Bundle savedInstanceState, @Nullable String rootKey) { setPreferencesFromResource(R.xml.general_preference, rootKey); @@ -21,6 +30,11 @@ public class GeneralPreferences extends BasePreferenceFragment { setItemClick("behavior.pictureInPicture", (pref)-> GlobalConfig.set(GlobalConfig.KEY_PICTURE_IN_PICTURE, ((SwitchPreferenceCompat)pref).isChecked())); setActivityTitle(R.string.general); refresh(); + + setItemClick("games.aes_key", pref -> pickFileRequest.launch(new String[]{ "text/plain" })); + setItemClick("games.seed_db", pref -> pickFileRequest.launch(new String[]{ "application/octet-stream" })); + + pickFileRequest = registerForActivityResult(openFolderContract, this); } @Override @@ -31,5 +45,45 @@ public class GeneralPreferences extends BasePreferenceFragment { private void refresh() { setSwitchValue("behavior.pictureInPicture", GlobalConfig.get(GlobalConfig.KEY_PICTURE_IN_PICTURE)); + setSummaryValue("games.aes_key", String.format(getString(FileUtils.exists(FileUtils.getPrivatePath()+"/sysdata/aes_keys.txt") ? R.string.file_available : R.string.file_not_available), "aes_keys.txt")); + setSummaryValue("games.seed_db", String.format(getString(FileUtils.exists(FileUtils.getPrivatePath()+"/sysdata/seeddb.bin") ? R.string.file_available : R.string.file_not_available), "seeddb.bin")); } + + @Override + public void onDestroy() { + super.onDestroy(); + if (pickFileRequest != null) { + pickFileRequest.unregister(); + pickFileRequest = null; + } + } + + @Override + public void onActivityResult(Uri result) { + if (result != null) { + String path = result.toString(); + Log.w("File", path + " -> " + FileUtils.getName(path)); + switch (String.valueOf(FileUtils.getName(path))) { + case "aes_keys.txt": + case "seeddb.bin": { + String name = FileUtils.getName(path); + if (FileUtils.getLength(path) < 1024 * 256) { + String sysdataFolder = FileUtils.getPrivatePath() + "/sysdata"; + if (!FileUtils.exists(sysdataFolder)) { + FileUtils.createDir(FileUtils.getPrivatePath(), "sysdata"); + } + if (FileUtils.exists(sysdataFolder + "/" + name)) { + FileUtils.delete(sysdataFolder + "/" + name); + } + FileUtils.copyFile(path, FileUtils.getPrivatePath() + "/sysdata/", name); + Toast.makeText(getActivity(), String.format(getString(R.string.file_imported), name), Toast.LENGTH_LONG).show(); + } else { + Toast.makeText(getActivity(), R.string.invalid_file, Toast.LENGTH_LONG).show(); + } + } break; + default: Toast.makeText(getActivity(), R.string.invalid_file, Toast.LENGTH_LONG).show(); break; + } + refresh(); + } + } } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java index 4bc6e299..14c4e576 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/preferences/screen_editor/ScreenEditorPreference.java @@ -23,7 +23,7 @@ public class ScreenEditorPreference extends Fragment { @Override public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) { layout = new LinearLayout(container.getContext()); - layout.setSystemUiVisibility(View.SYSTEM_UI_FLAG_HIDE_NAVIGATION|View.SYSTEM_UI_FLAG_FULLSCREEN|View.SYSTEM_UI_FLAG_IMMERSIVE); + layout.setSystemUiVisibility(View.SYSTEM_UI_FLAG_FULLSCREEN|View.SYSTEM_UI_FLAG_IMMERSIVE); return layout; } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java index ca6fad90..397eef05 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/app/provider/AppDataDocumentProvider.java @@ -95,7 +95,7 @@ public class AppDataDocumentProvider extends DocumentsProvider { private void includeFile(MatrixCursor cursor, File file) { int flags = 0; if (file.isDirectory()) { - flags = Document.FLAG_DIR_SUPPORTS_CREATE; + flags = Document.FLAG_DIR_SUPPORTS_CREATE | Document.FLAG_SUPPORTS_DELETE; } else { flags = Document.FLAG_SUPPORTS_WRITE | Document.FLAG_SUPPORTS_REMOVE | Document.FLAG_SUPPORTS_DELETE; } diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java index 21645b7e..c8750f88 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/data/config/GlobalConfig.java @@ -21,7 +21,8 @@ public class GlobalConfig { public static DataModel data; - public static final Key KEY_SHADER_JIT = new Key<>("emu.shader_jit", false); + public static final Key KEY_SHADER_JIT = new Key<>("emu.shader_jit", true); + public static final Key KEY_ACCURATE_SHADER_MULTIPLY = new Key<>("emu.accurate_shader_mul", false); public static final Key KEY_PICTURE_IN_PICTURE = new Key<>("app.behavior.pictureInPicture", false); public static final Key KEY_SHOW_PERFORMANCE_OVERLAY = new Key<>("dev.performanceOverlay", false); public static final Key KEY_LOGGER_SERVICE = new Key<>("dev.loggerService", false); diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Quaternion.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Quaternion.java new file mode 100644 index 00000000..7c485c6c --- /dev/null +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Quaternion.java @@ -0,0 +1,31 @@ +package com.panda3ds.pandroid.math; + +public class Quaternion { + public float x, y, z, w; + public Quaternion(float x, float y, float z, float w) { + this.x = x; + this.y = y; + this.z = z; + this.w = w; + } + + public Quaternion fromEuler(Vector3 euler) { + float x = euler.x; + float y = euler.y; + float z = euler.z; + + double c1 = Math.cos(x / 2.0); + double c2 = Math.cos(y / 2.0); + double c3 = Math.cos(z / 2.0); + + double s1 = Math.sin(x / 2.0); + double s2 = Math.sin(y / 2.0); + double s3 = Math.sin(z / 2.0); + + this.x = (float) (s1 * c2 * c3 + c1 * s2 * s3); + this.y = (float) (c1 * s2 * c3 - s1 * c2 * s3); + this.z = (float) (c1 * c2 * s3 + s1 * s2 * c3); + this.w = (float) (c1 * c2 * c3 - s1 * s2 * s3); + return this; + } +} diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Vector3.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Vector3.java new file mode 100644 index 00000000..055972ec --- /dev/null +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/math/Vector3.java @@ -0,0 +1,32 @@ +package com.panda3ds.pandroid.math; + +public class Vector3 { + private final Quaternion quaternion = new Quaternion(0, 0, 0, 0); + public float x, y, z; + + public Vector3(float x, float y, float z) { + this.x = x; + this.y = y; + this.z = z; + } + + public Vector3 rotateByEuler(Vector3 euler) { + this.quaternion.fromEuler(euler); + + float x = this.x, y = this.y, z = this.z; + float qx = this.quaternion.x; + float qy = this.quaternion.y; + float qz = this.quaternion.z; + float qw = this.quaternion.w; + + float ix = qw * x + qy * z - qz * y; + float iy = qw * y + qz * x - qx * z; + float iz = qw * z + qx * y - qy * x; + float iw = -qx * x - qy * qz * z; + + this.x = ix * qw + iw * -qx + iy * -qz - iz * -qy; + this.y = iy * qw + iw * -qy + iz * -qx - ix * -qz; + this.z = iz * qw + iw * -qz + ix * -qy - iy * -qx; + return this; + } +} diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java index 85245454..26b029d9 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/utils/FileUtils.java @@ -230,6 +230,10 @@ public class FileUtils { return parseFile(path).lastModified(); } + public static long getLength(String path) { + return parseFile(path).length(); + } + public static String[] listFiles(String path) { DocumentFile folder = parseFile(path); DocumentFile[] files = folder.listFiles(); diff --git a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java index c57421ab..3fb435b4 100644 --- a/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java +++ b/src/pandroid/app/src/main/java/com/panda3ds/pandroid/view/PandaGlRenderer.java @@ -93,6 +93,7 @@ public class PandaGlRenderer implements GLSurfaceView.Renderer, ConsoleRenderer AlberDriver.Initialize(); AlberDriver.setShaderJitEnabled(GlobalConfig.get(GlobalConfig.KEY_SHADER_JIT)); + AlberDriver.setAccurateShaderMulEnable(GlobalConfig.get(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY)); // If loading the ROM failed, display an error message and early exit if (!AlberDriver.LoadRom(romPath)) { diff --git a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml index 6f3b755b..036d09bc 100644 --- a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml +++ b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -1,6 +1,5 @@ - - - + + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml index 6f3b755b..036d09bc 100644 --- a/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml +++ b/src/pandroid/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -1,6 +1,5 @@ - - - + + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp index c209e78e..61d2084a 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..9cae8387 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp index b2dfe3d1..c3c7e5a2 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp index 4f0f1d64..3f5d5d42 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..3c486012 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp index 62b611da..b11dd67f 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp index 948a3070..eebe375d 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..aef35be5 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp index 1b9a6956..a52a83b3 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp index 28d4b77f..9368d16c 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..95242200 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp index 9287f508..56373697 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp index aa7d6427..408caf75 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp and b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..df4f2156 Binary files /dev/null and b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp differ diff --git a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp index 9126ae37..9c7b64f0 100644 Binary files a/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp and b/src/pandroid/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml b/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml index f2e144c3..521f199e 100644 --- a/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml +++ b/src/pandroid/app/src/main/res/values-pt-rBR/strings.xml @@ -90,4 +90,12 @@ Comportamento Jogo invalido Ferramentas + Multiplicação precisa de shader + Usar calculos mais precisos para shaders + Importar chaves + %s disponível + %s não disponível + Importar SeedDB + Arquivo inválido + %s Importado diff --git a/src/pandroid/app/src/main/res/values/ic_launcher_background.xml b/src/pandroid/app/src/main/res/values/ic_launcher_background.xml new file mode 100644 index 00000000..28382334 --- /dev/null +++ b/src/pandroid/app/src/main/res/values/ic_launcher_background.xml @@ -0,0 +1,4 @@ + + + #C45F5F + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/values/strings.xml b/src/pandroid/app/src/main/res/values/strings.xml index 25569528..63a6c246 100644 --- a/src/pandroid/app/src/main/res/values/strings.xml +++ b/src/pandroid/app/src/main/res/values/strings.xml @@ -96,4 +96,12 @@ Taiwan Behavior Invalid game + Accurate shader multiplication + Can improve rendering at a small performance loss + Import keys + %s imported + %s available + %s not available + Import SeedDB + Invalid file diff --git a/src/pandroid/app/src/main/res/xml/advanced_preferences.xml b/src/pandroid/app/src/main/res/xml/advanced_preferences.xml index 6602fdfd..9ef81dbf 100644 --- a/src/pandroid/app/src/main/res/xml/advanced_preferences.xml +++ b/src/pandroid/app/src/main/res/xml/advanced_preferences.xml @@ -28,5 +28,11 @@ app:summary="@string/pref_shader_jit_summary" app:iconSpaceReserved="false"/> + + \ No newline at end of file diff --git a/src/pandroid/app/src/main/res/xml/general_preference.xml b/src/pandroid/app/src/main/res/xml/general_preference.xml index 3e2d93c8..4352ee54 100644 --- a/src/pandroid/app/src/main/res/xml/general_preference.xml +++ b/src/pandroid/app/src/main/res/xml/general_preference.xml @@ -23,6 +23,16 @@ app:title="@string/pref_game_folders" app:summary="@string/pref_game_folders_summary" app:iconSpaceReserved="false"/> + + + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include +#include + +namespace Renderdoc { + enum class CaptureState { + Idle, + Triggered, + InProgress, + }; + + static CaptureState captureState{CaptureState::Idle}; + RENDERDOC_API_1_6_0* rdocAPI{}; + + void loadRenderdoc() { +#ifdef WIN32 + // Check if we are running in Renderdoc GUI + HMODULE mod = GetModuleHandleA("renderdoc.dll"); + if (!mod) { + // If enabled in config, try to load RDoc runtime in offline mode + HKEY h_reg_key; + LONG result = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Classes\\RenderDoc.RDCCapture.1\\DefaultIcon\\", 0, KEY_READ, &h_reg_key); + if (result != ERROR_SUCCESS) { + return; + } + std::array keyString{}; + DWORD stringSize{keyString.size()}; + result = RegQueryValueExW(h_reg_key, L"", 0, NULL, (LPBYTE)keyString.data(), &stringSize); + if (result != ERROR_SUCCESS) { + return; + } + + std::filesystem::path path{keyString.cbegin(), keyString.cend()}; + path = path.parent_path().append("renderdoc.dll"); + const auto path_to_lib = path.generic_string(); + mod = LoadLibraryA(path_to_lib.c_str()); + } + + if (mod) { + const auto RENDERDOC_GetAPI = reinterpret_cast(GetProcAddress(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdocAPI); + + if (ret != 1) { + Helpers::panic("Invalid return value from RENDERDOC_GetAPI"); + } + } +#else +#ifdef ANDROID + static constexpr const char RENDERDOC_LIB[] = "libVkLayer_GLES_RenderDoc.so"; +#else + static constexpr const char RENDERDOC_LIB[] = "librenderdoc.so"; +#endif + if (void* mod = dlopen(RENDERDOC_LIB, RTLD_NOW | RTLD_NOLOAD)) { + const auto RENDERDOC_GetAPI = reinterpret_cast(dlsym(mod, "RENDERDOC_GetAPI")); + const s32 ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void**)&rdocAPI); + + if (ret != 1) { + Helpers::panic("Invalid return value from RENDERDOC_GetAPI"); + } + } +#endif + if (rdocAPI) { + // Disable default capture keys as they suppose to trigger present-to-present capturing + // and it is not what we want + rdocAPI->SetCaptureKeys(nullptr, 0); + + // Also remove rdoc crash handler + rdocAPI->UnloadCrashHandler(); + } + } + + void startCapture() { + if (!rdocAPI) { + return; + } + + if (captureState == CaptureState::Triggered) { + rdocAPI->StartFrameCapture(nullptr, nullptr); + captureState = CaptureState::InProgress; + } + } + + void endCapture() { + if (!rdocAPI) { + return; + } + + if (captureState == CaptureState::InProgress) { + rdocAPI->EndFrameCapture(nullptr, nullptr); + captureState = CaptureState::Idle; + } + } + + void triggerCapture() { + if (captureState == CaptureState::Idle) { + captureState = CaptureState::Triggered; + } + } + + void setOutputDir(const std::string& path, const std::string& prefix) { + if (rdocAPI) { + rdocAPI->SetCaptureFilePathTemplate((path + '\\' + prefix).c_str()); + } + } +} // namespace Renderdoc +#endif \ No newline at end of file diff --git a/src/renderer.cpp b/src/renderer.cpp index 76c3e7a0..6a18df85 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -17,6 +17,7 @@ std::optional Renderer::typeFromString(std::string inString) { {"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null}, {"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL}, {"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan}, + {"mtl", RendererType::Metal}, {"metal", RendererType::Metal}, {"sw", RendererType::Software}, {"soft", RendererType::Software}, {"software", RendererType::Software}, {"softrast", RendererType::Software}, }; @@ -33,7 +34,8 @@ const char* Renderer::typeToString(RendererType rendererType) { case RendererType::Null: return "null"; case RendererType::OpenGL: return "opengl"; case RendererType::Vulkan: return "vulkan"; + case RendererType::Metal: return "metal"; case RendererType::Software: return "software"; default: return "Invalid"; } -} \ No newline at end of file +} diff --git a/tests/PICA_LITP/Makefile b/tests/PICA_LITP/Makefile new file mode 100644 index 00000000..46a94048 --- /dev/null +++ b/tests/PICA_LITP/Makefile @@ -0,0 +1,255 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# GRAPHICS is a list of directories containing graphics files +# GFXBUILD is the directory where converted graphics files will be placed +# If set to $(BUILD), it will statically link in the converted +# files as if they were data files. +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional) +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include +GRAPHICS := gfx +GFXBUILD := $(BUILD) +#ROMFS := romfs +#GFXBUILD := $(ROMFS)/gfx + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -ffunction-sections \ + $(ARCH) + +CFLAGS += $(INCLUDE) -D__3DS__ + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lcitro3d -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica))) +SHLISTFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.shlist))) +GFXFILES := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.t3s))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +#--------------------------------------------------------------------------------- +ifeq ($(GFXBUILD),$(BUILD)) +#--------------------------------------------------------------------------------- +export T3XFILES := $(GFXFILES:.t3s=.t3x) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- +export ROMFS_T3XFILES := $(patsubst %.t3s, $(GFXBUILD)/%.t3x, $(GFXFILES)) +export T3XHFILES := $(patsubst %.t3s, $(BUILD)/%.h, $(GFXFILES)) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES_SOURCES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export OFILES_BIN := $(addsuffix .o,$(BINFILES)) \ + $(PICAFILES:.v.pica=.shbin.o) $(SHLISTFILES:.shlist=.shbin.o) \ + $(addsuffix .o,$(T3XFILES)) + +export OFILES := $(OFILES_BIN) $(OFILES_SOURCES) + +export HFILES := $(PICAFILES:.v.pica=_shbin.h) $(SHLISTFILES:.shlist=_shbin.h) \ + $(addsuffix .h,$(subst .,_,$(BINFILES))) \ + $(GFXFILES:.t3s=.h) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +export _3DSXDEPS := $(if $(NO_SMDH),,$(OUTPUT).smdh) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +ifneq ($(ROMFS),) + export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS) +endif + +.PHONY: all clean + +#--------------------------------------------------------------------------------- +all: $(BUILD) $(GFXBUILD) $(DEPSDIR) $(ROMFS_T3XFILES) $(T3XHFILES) + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +$(BUILD): + @mkdir -p $@ + +ifneq ($(GFXBUILD),$(BUILD)) +$(GFXBUILD): + @mkdir -p $@ +endif + +ifneq ($(DEPSDIR),$(BUILD)) +$(DEPSDIR): + @mkdir -p $@ +endif + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(GFXBUILD) + +#--------------------------------------------------------------------------------- +$(GFXBUILD)/%.t3x $(BUILD)/%.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $(BUILD)/$*.h -d $(DEPSDIR)/$*.d -o $(GFXBUILD)/$*.t3x + +#--------------------------------------------------------------------------------- +else + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +$(OUTPUT).3dsx : $(OUTPUT).elf $(_3DSXDEPS) + +$(OFILES_SOURCES) : $(HFILES) + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o %_bin.h : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +.PRECIOUS : %.t3x +#--------------------------------------------------------------------------------- +%.t3x.o %_t3x.h : %.t3x +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rules for assembling GPU shaders +#--------------------------------------------------------------------------------- +define shader-as + $(eval CURBIN := $*.shbin) + $(eval DEPSFILE := $(DEPSDIR)/$*.shbin.d) + echo "$(CURBIN).o: $< $1" > $(DEPSFILE) + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + picasso -o $(CURBIN) $1 + bin2s $(CURBIN) | $(AS) -o $*.shbin.o +endef + +%.shbin.o %_shbin.h : %.v.pica %.g.pica + @echo $(notdir $^) + @$(call shader-as,$^) + +%.shbin.o %_shbin.h : %.v.pica + @echo $(notdir $<) + @$(call shader-as,$<) + +%.shbin.o %_shbin.h : %.shlist + @echo $(notdir $<) + @$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)$(file))) + +#--------------------------------------------------------------------------------- +%.t3x %.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $*.h -d $*.d -o $*.t3x + +-include $(DEPSDIR)/*.d + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c new file mode 100644 index 00000000..9bcab5b9 --- /dev/null +++ b/tests/PICA_LITP/source/main.c @@ -0,0 +1,123 @@ +#include <3ds.h> +#include +#include + +#include "vshader_shbin.h" + + +#define CLEAR_COLOR 0x68B0D8FF + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | \ + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static C3D_Mtx projection; + +static void sceneInit(void) { + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); + + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + + // Configure attributes for use with the vertex shader + // Attribute format and element count are ignored in immediate mode + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); +} + +static void sceneRender(void) { + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + + // Draw the triangle directly + C3D_ImmDrawBegin(GPU_TRIANGLES); + // Triangle 1 + // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + + // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); + + // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); + + // Triangle 2 + // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) + C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 64 / 128 = 0.5 + C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); + + C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 128 / 128 = 1.0 + C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); + + C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); + // Output g component should be 0 / 128 = 0 + C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); + C3D_ImmDrawEnd(); +} + +static void sceneExit(void) { + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() { + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + + // Initialize the scene + sceneInit(); + + // Main loop + while (aptMainLoop()) { + hidScanInput(); + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) break; // break in order to return to hbmenu + + // Render the scene + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); + C3D_FrameEnd(0); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; +} \ No newline at end of file diff --git a/tests/PICA_LITP/source/vshader.v.pica b/tests/PICA_LITP/source/vshader.v.pica new file mode 100644 index 00000000..d745f939 --- /dev/null +++ b/tests/PICA_LITP/source/vshader.v.pica @@ -0,0 +1,73 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, 0.1) +.constf myconst2(0.3, 0.0, 0.0, 0.0) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +.constf magenta(0.8, 0.192, 0.812, 1.0) +.constf cyan(0.137, 0.949, 0.906, 1.0) +.constf lime(0.286, 0.929, 0.412, 1.0) + +.constf normalize_y(1.0, 1.0/128.0, 1.0, 1.0) + +; Outputs +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias inclr v1 + +.bool test + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; outpos = projectionMatrix * inpos + dp4 outpos.x, projection[0], r0 + dp4 outpos.y, projection[1], r0 + dp4 outpos.z, projection[2], r0 + dp4 outpos.w, projection[3], r0 + + ; Test litp via the output fragment colour + ; r1 = input colour + mov r1, inclr + + ; This should perform the following operation: + ; cmp = (x >= 0, w >= 0) + ; dest = ( max(x, 0), clamp(y, -128, +128 ), 0, max(w, 0) ); + litp r2, r1 + + ifc cmp.x + ifc cmp.y + ; cmp.x = 1, cmp.y = 1, write magenta + mov outclr, magenta + end + .else + ; cmp.x = 1, cmp.y = 0, write cyan + mov outclr, cyan + end + .end + .else + ifc cmp.y + ; cmp.x = 0, cmp.y + mov outclr, lime + end + .end + .end + + ; cmp.x 0, cmp.y = 0, write output of litp to out colour, with y normalized to [-1, 1] + mul r2.xyz, normalize_y, r2 + ; Set alpha to one + mov r2.a, ones.a + + mov outclr, r2 + end +.end \ No newline at end of file diff --git a/third_party/duckstation/gl/stream_buffer.cpp b/third_party/duckstation/gl/stream_buffer.cpp new file mode 100644 index 00000000..b7a40603 --- /dev/null +++ b/third_party/duckstation/gl/stream_buffer.cpp @@ -0,0 +1,288 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#include "gl/stream_buffer.h" + +#include +#include + +#include "align.hpp" + +OpenGLStreamBuffer::OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : m_target(target), m_buffer_id(buffer_id), m_size(size) {} +OpenGLStreamBuffer::~OpenGLStreamBuffer() { glDeleteBuffers(1, &m_buffer_id); } + +void OpenGLStreamBuffer::Bind() { glBindBuffer(m_target, m_buffer_id); } +void OpenGLStreamBuffer::Unbind() { glBindBuffer(m_target, 0); } + +void OpenGLStreamBuffer::SetDebugName(std::string_view name) { +#ifdef GPU_DEBUG_INFO + if (glObjectLabel) { + glObjectLabel(GL_BUFFER, GetGLBufferId(), static_cast(name.length()), static_cast(name.data())); + } +#endif +} + +namespace { + // Uses glBufferSubData() to update. Preferred for drivers which don't support {ARB,EXT}_buffer_storage. + class BufferSubDataStreamBuffer final : public OpenGLStreamBuffer { + public: + ~BufferSubDataStreamBuffer() override { Common::alignedFree(m_cpu_buffer); } + + MappingResult Map(u32 alignment, u32 min_size) override { return MappingResult{static_cast(m_cpu_buffer), 0, 0, m_size / alignment}; } + + u32 Unmap(u32 used_size) override { + if (used_size == 0) return 0; + + glBindBuffer(m_target, m_buffer_id); + glBufferSubData(m_target, 0, used_size, m_cpu_buffer); + return 0; + } + + u32 GetChunkSize() const override { return m_size; } + + static std::unique_ptr Create(GLenum target, u32 size) { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr(new BufferSubDataStreamBuffer(target, buffer_id, size)); + } + + private: + BufferSubDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { + m_cpu_buffer = static_cast(Common::alignedMalloc(size, 32)); + if (!m_cpu_buffer) Panic("Failed to allocate CPU storage for GL buffer"); + } + + u8* m_cpu_buffer; + }; + + // Uses BufferData() to orphan the buffer after every update. Used on Mali where BufferSubData forces a sync. + class BufferDataStreamBuffer final : public OpenGLStreamBuffer { + public: + ~BufferDataStreamBuffer() override { Common::alignedFree(m_cpu_buffer); } + + MappingResult Map(u32 alignment, u32 min_size) override { return MappingResult{static_cast(m_cpu_buffer), 0, 0, m_size / alignment}; } + + u32 Unmap(u32 used_size) override { + if (used_size == 0) return 0; + + glBindBuffer(m_target, m_buffer_id); + glBufferData(m_target, used_size, m_cpu_buffer, GL_STREAM_DRAW); + return 0; + } + + u32 GetChunkSize() const override { return m_size; } + + static std::unique_ptr Create(GLenum target, u32 size) { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + glBufferData(target, size, nullptr, GL_STREAM_DRAW); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + return std::unique_ptr(new BufferDataStreamBuffer(target, buffer_id, size)); + } + + private: + BufferDataStreamBuffer(GLenum target, GLuint buffer_id, u32 size) : OpenGLStreamBuffer(target, buffer_id, size) { + m_cpu_buffer = static_cast(Common::alignedMalloc(size, 32)); + if (!m_cpu_buffer) Panic("Failed to allocate CPU storage for GL buffer"); + } + + u8* m_cpu_buffer; + }; + + // Base class for implementations which require syncing. + class SyncingStreamBuffer : public OpenGLStreamBuffer { + public: + enum : u32 { NUM_SYNC_POINTS = 16 }; + + virtual ~SyncingStreamBuffer() override { + for (u32 i = m_available_block_index; i <= m_used_block_index; i++) { + glDeleteSync(m_sync_objects[i]); + } + } + + protected: + SyncingStreamBuffer(GLenum target, GLuint buffer_id, u32 size) + : OpenGLStreamBuffer(target, buffer_id, size), m_bytes_per_block((size + (NUM_SYNC_POINTS)-1) / NUM_SYNC_POINTS) {} + + ALWAYS_INLINE u32 GetSyncIndexForOffset(u32 offset) { return offset / m_bytes_per_block; } + + ALWAYS_INLINE void AddSyncsForOffset(u32 offset) { + const u32 end = GetSyncIndexForOffset(offset); + for (; m_used_block_index < end; m_used_block_index++) { + if (m_sync_objects[m_used_block_index]) { + Helpers::warn("GL stream buffer: Fence slot we're trying to insert is already in use"); + } + + m_sync_objects[m_used_block_index] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + } + + ALWAYS_INLINE void WaitForSync(GLsync& sync) { + glClientWaitSync(sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + glDeleteSync(sync); + sync = nullptr; + } + + ALWAYS_INLINE void EnsureSyncsWaitedForOffset(u32 offset) { + const u32 end = std::min(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS); + for (; m_available_block_index < end; m_available_block_index++) { + if (!m_sync_objects[m_available_block_index]) [[unlikely]] { + Helpers::warn("GL stream buffer: Fence slot we're trying to wait on is not in use"); + } + + WaitForSync(m_sync_objects[m_available_block_index]); + } + } + + void AllocateSpace(u32 size) { + // add sync objects for writes since the last allocation + AddSyncsForOffset(m_position); + + // wait for sync objects for the space we want to use + EnsureSyncsWaitedForOffset(m_position + size); + + // wrap-around? + if ((m_position + size) > m_size) { + // current position ... buffer end + AddSyncsForOffset(m_size); + + // rewind, and try again + m_position = 0; + + // wait for the sync at the start of the buffer + WaitForSync(m_sync_objects[0]); + m_available_block_index = 1; + + // and however much more we need to satisfy the allocation + EnsureSyncsWaitedForOffset(size); + m_used_block_index = 0; + } + } + + u32 GetChunkSize() const override { return m_size / NUM_SYNC_POINTS; } + + u32 m_position = 0; + u32 m_used_block_index = 0; + u32 m_available_block_index = NUM_SYNC_POINTS; + u32 m_bytes_per_block; + std::array m_sync_objects{}; + }; + + class BufferStorageStreamBuffer : public SyncingStreamBuffer { + public: + ~BufferStorageStreamBuffer() override { + glBindBuffer(m_target, m_buffer_id); + glUnmapBuffer(m_target); + glBindBuffer(m_target, 0); + } + + MappingResult Map(u32 alignment, u32 min_size) override { + if (m_position > 0) m_position = Common::alignUp(m_position, alignment); + + AllocateSpace(min_size); + if ((m_position + min_size) > (m_available_block_index * m_bytes_per_block)) [[unlikely]] { + Helpers::panic("GL stream buffer: Invalid size passed to Unmap"); + } + + const u32 free_space_in_block = ((m_available_block_index * m_bytes_per_block) - m_position); + return MappingResult{static_cast(m_mapped_ptr + m_position), m_position, m_position / alignment, free_space_in_block / alignment}; + } + + u32 Unmap(u32 used_size) override { + if ((m_position + used_size) > m_size) [[unlikely]] { + Helpers::panic("GL stream buffer: Invalid size passed to Unmap"); + } + + if (!m_coherent) { + if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_direct_state_access) { + glFlushMappedNamedBufferRange(m_buffer_id, m_position, used_size); + } else { + Bind(); + glFlushMappedBufferRange(m_target, m_position, used_size); + } + } + + const u32 prev_position = m_position; + m_position += used_size; + return prev_position; + } + + static std::unique_ptr Create(GLenum target, u32 size, bool coherent = true) { + glGetError(); + + GLuint buffer_id; + glGenBuffers(1, &buffer_id); + glBindBuffer(target, buffer_id); + + const u32 flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + const u32 map_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT); + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage) + glBufferStorage(target, size, nullptr, flags); + else if (GLAD_GL_EXT_buffer_storage) + glBufferStorageEXT(target, size, nullptr, flags); + + GLenum err = glGetError(); + if (err != GL_NO_ERROR) { + glBindBuffer(target, 0); + glDeleteBuffers(1, &buffer_id); + return {}; + } + + u8* mapped_ptr = static_cast(glMapBufferRange(target, 0, size, map_flags)); + AssertMsg(mapped_ptr, "Persistent buffer was mapped"); + + return std::unique_ptr(new BufferStorageStreamBuffer(target, buffer_id, size, mapped_ptr, coherent)); + } + + private: + BufferStorageStreamBuffer(GLenum target, GLuint buffer_id, u32 size, u8* mapped_ptr, bool coherent) + : SyncingStreamBuffer(target, buffer_id, size), m_mapped_ptr(mapped_ptr), m_coherent(coherent) {} + + u8* m_mapped_ptr; + bool m_coherent; + }; + +} // namespace + +std::unique_ptr OpenGLStreamBuffer::Create(GLenum target, u32 size) { + std::unique_ptr buf; + if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) { + buf = BufferStorageStreamBuffer::Create(target, size); + if (buf) return buf; + } + + // BufferSubData is slower on all drivers except NVIDIA... +#if 0 + const char* vendor = reinterpret_cast(glGetString(GL_VENDOR)); + if (std::strcmp(vendor, "ARM") == 0 || std::strcmp(vendor, "Qualcomm") == 0) { + // Mali and Adreno drivers can't do sub-buffer tracking... + return BufferDataStreamBuffer::Create(target, size); + } + + return BufferSubDataStreamBuffer::Create(target, size); +#else + return BufferDataStreamBuffer::Create(target, size); +#endif +} \ No newline at end of file diff --git a/third_party/duckstation/gl/stream_buffer.h b/third_party/duckstation/gl/stream_buffer.h new file mode 100644 index 00000000..6b3562e7 --- /dev/null +++ b/third_party/duckstation/gl/stream_buffer.h @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: 2019-2023 Connor McLaughlin +// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) + +#pragma once + +#include +// Comment to avoid clang-format reordering the glad header + +#include +#include +#include +#include + +#include "duckstation_compat.h" +#include "helpers.hpp" + +class OpenGLStreamBuffer { + public: + virtual ~OpenGLStreamBuffer(); + + ALWAYS_INLINE GLuint GetGLBufferId() const { return m_buffer_id; } + ALWAYS_INLINE GLenum GetGLTarget() const { return m_target; } + ALWAYS_INLINE u32 GetSize() const { return m_size; } + + void Bind(); + void Unbind(); + + void SetDebugName(std::string_view name); + + struct MappingResult { + void* pointer; + u32 buffer_offset; + u32 index_aligned; // offset / alignment, suitable for base vertex + u32 space_aligned; // remaining space / alignment + }; + + virtual MappingResult Map(u32 alignment, u32 min_size) = 0; + + /// Returns the position in the buffer *before* the start of used_size. + virtual u32 Unmap(u32 used_size) = 0; + + /// Returns the minimum granularity of blocks which sync objects will be created around. + virtual u32 GetChunkSize() const = 0; + + static std::unique_ptr Create(GLenum target, u32 size); + + protected: + OpenGLStreamBuffer(GLenum target, GLuint buffer_id, u32 size); + + GLenum m_target; + GLuint m_buffer_id; + u32 m_size; +}; \ No newline at end of file diff --git a/third_party/fdk-aac b/third_party/fdk-aac new file mode 160000 index 00000000..5559136b --- /dev/null +++ b/third_party/fdk-aac @@ -0,0 +1 @@ +Subproject commit 5559136bb53ce38f6f07dac4f47674dd4f032d03 diff --git a/third_party/fmt b/third_party/fmt new file mode 160000 index 00000000..f8581bce --- /dev/null +++ b/third_party/fmt @@ -0,0 +1 @@ +Subproject commit f8581bcecf317e8753887b68187c9ef1ba0524f4 diff --git a/third_party/hips b/third_party/hips new file mode 160000 index 00000000..bbe8faf1 --- /dev/null +++ b/third_party/hips @@ -0,0 +1 @@ +Subproject commit bbe8faf149c4e10aaa45e2454fdb386e4cabf0cb diff --git a/third_party/libretro/include/libretro.h b/third_party/libretro/include/libretro.h new file mode 100644 index 00000000..96d07df4 --- /dev/null +++ b/third_party/libretro/include/libretro.h @@ -0,0 +1,4405 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this libretro API header (libretro.h). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef LIBRETRO_H__ +#define LIBRETRO_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __cplusplus +#if defined(_MSC_VER) && _MSC_VER < 1800 && !defined(SN_TARGET_PS3) +/* Hack applied for MSVC when compiling in C89 mode + * as it isn't C99-compliant. */ +#define bool unsigned char +#define true 1 +#define false 0 +#else +#include +#endif +#endif + +#ifndef RETRO_CALLCONV +# if defined(__GNUC__) && defined(__i386__) && !defined(__x86_64__) +# define RETRO_CALLCONV __attribute__((cdecl)) +# elif defined(_MSC_VER) && defined(_M_X86) && !defined(_M_X64) +# define RETRO_CALLCONV __cdecl +# else +# define RETRO_CALLCONV /* all other platforms only have one calling convention each */ +# endif +#endif + +#ifndef RETRO_API +# if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) +# ifdef RETRO_IMPORT_SYMBOLS +# ifdef __GNUC__ +# define RETRO_API RETRO_CALLCONV __attribute__((__dllimport__)) +# else +# define RETRO_API RETRO_CALLCONV __declspec(dllimport) +# endif +# else +# ifdef __GNUC__ +# define RETRO_API RETRO_CALLCONV __attribute__((__dllexport__)) +# else +# define RETRO_API RETRO_CALLCONV __declspec(dllexport) +# endif +# endif +# else +# if defined(__GNUC__) && __GNUC__ >= 4 +# define RETRO_API RETRO_CALLCONV __attribute__((__visibility__("default"))) +# else +# define RETRO_API RETRO_CALLCONV +# endif +# endif +#endif + +/* Used for checking API/ABI mismatches that can break libretro + * implementations. + * It is not incremented for compatible changes to the API. + */ +#define RETRO_API_VERSION 1 + +/* + * Libretro's fundamental device abstractions. + * + * Libretro's input system consists of some standardized device types, + * such as a joypad (with/without analog), mouse, keyboard, lightgun + * and a pointer. + * + * The functionality of these devices are fixed, and individual cores + * map their own concept of a controller to libretro's abstractions. + * This makes it possible for frontends to map the abstract types to a + * real input device, and not having to worry about binding input + * correctly to arbitrary controller layouts. + */ + +#define RETRO_DEVICE_TYPE_SHIFT 8 +#define RETRO_DEVICE_MASK ((1 << RETRO_DEVICE_TYPE_SHIFT) - 1) +#define RETRO_DEVICE_SUBCLASS(base, id) (((id + 1) << RETRO_DEVICE_TYPE_SHIFT) | base) + +/* Input disabled. */ +#define RETRO_DEVICE_NONE 0 + +/* The JOYPAD is called RetroPad. It is essentially a Super Nintendo + * controller, but with additional L2/R2/L3/R3 buttons, similar to a + * PS1 DualShock. */ +#define RETRO_DEVICE_JOYPAD 1 + +/* The mouse is a simple mouse, similar to Super Nintendo's mouse. + * X and Y coordinates are reported relatively to last poll (poll callback). + * It is up to the libretro implementation to keep track of where the mouse + * pointer is supposed to be on the screen. + * The frontend must make sure not to interfere with its own hardware + * mouse pointer. + */ +#define RETRO_DEVICE_MOUSE 2 + +/* KEYBOARD device lets one poll for raw key pressed. + * It is poll based, so input callback will return with the current + * pressed state. + * For event/text based keyboard input, see + * RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. + */ +#define RETRO_DEVICE_KEYBOARD 3 + +/* LIGHTGUN device is similar to Guncon-2 for PlayStation 2. + * It reports X/Y coordinates in screen space (similar to the pointer) + * in the range [-0x8000, 0x7fff] in both axes, with zero being center and + * -0x8000 being out of bounds. + * As well as reporting on/off screen state. It features a trigger, + * start/select buttons, auxiliary action buttons and a + * directional pad. A forced off-screen shot can be requested for + * auto-reloading function in some games. + */ +#define RETRO_DEVICE_LIGHTGUN 4 + +/* The ANALOG device is an extension to JOYPAD (RetroPad). + * Similar to DualShock2 it adds two analog sticks and all buttons can + * be analog. This is treated as a separate device type as it returns + * axis values in the full analog range of [-0x7fff, 0x7fff], + * although some devices may return -0x8000. + * Positive X axis is right. Positive Y axis is down. + * Buttons are returned in the range [0, 0x7fff]. + * Only use ANALOG type when polling for analog values. + */ +#define RETRO_DEVICE_ANALOG 5 + +/* Abstracts the concept of a pointing mechanism, e.g. touch. + * This allows libretro to query in absolute coordinates where on the + * screen a mouse (or something similar) is being placed. + * For a touch centric device, coordinates reported are the coordinates + * of the press. + * + * Coordinates in X and Y are reported as: + * [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen, + * and 0x7fff corresponds to the far right/bottom of the screen. + * The "screen" is here defined as area that is passed to the frontend and + * later displayed on the monitor. + * + * The frontend is free to scale/resize this screen as it sees fit, however, + * (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the + * game image, etc. + * + * To check if the pointer coordinates are valid (e.g. a touch display + * actually being touched), PRESSED returns 1 or 0. + * + * If using a mouse on a desktop, PRESSED will usually correspond to the + * left mouse button, but this is a frontend decision. + * PRESSED will only return 1 if the pointer is inside the game screen. + * + * For multi-touch, the index variable can be used to successively query + * more presses. + * If index = 0 returns true for _PRESSED, coordinates can be extracted + * with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with + * index = 1, and so on. + * Eventually _PRESSED will return false for an index. No further presses + * are registered at this point. */ +#define RETRO_DEVICE_POINTER 6 + +/* Buttons for the RetroPad (JOYPAD). + * The placement of these is equivalent to placements on the + * Super Nintendo controller. + * L2/R2/L3/R3 buttons correspond to the PS1 DualShock. + * Also used as id values for RETRO_DEVICE_INDEX_ANALOG_BUTTON */ +#define RETRO_DEVICE_ID_JOYPAD_B 0 +#define RETRO_DEVICE_ID_JOYPAD_Y 1 +#define RETRO_DEVICE_ID_JOYPAD_SELECT 2 +#define RETRO_DEVICE_ID_JOYPAD_START 3 +#define RETRO_DEVICE_ID_JOYPAD_UP 4 +#define RETRO_DEVICE_ID_JOYPAD_DOWN 5 +#define RETRO_DEVICE_ID_JOYPAD_LEFT 6 +#define RETRO_DEVICE_ID_JOYPAD_RIGHT 7 +#define RETRO_DEVICE_ID_JOYPAD_A 8 +#define RETRO_DEVICE_ID_JOYPAD_X 9 +#define RETRO_DEVICE_ID_JOYPAD_L 10 +#define RETRO_DEVICE_ID_JOYPAD_R 11 +#define RETRO_DEVICE_ID_JOYPAD_L2 12 +#define RETRO_DEVICE_ID_JOYPAD_R2 13 +#define RETRO_DEVICE_ID_JOYPAD_L3 14 +#define RETRO_DEVICE_ID_JOYPAD_R3 15 + +#define RETRO_DEVICE_ID_JOYPAD_MASK 256 + +/* Index / Id values for ANALOG device. */ +#define RETRO_DEVICE_INDEX_ANALOG_LEFT 0 +#define RETRO_DEVICE_INDEX_ANALOG_RIGHT 1 +#define RETRO_DEVICE_INDEX_ANALOG_BUTTON 2 +#define RETRO_DEVICE_ID_ANALOG_X 0 +#define RETRO_DEVICE_ID_ANALOG_Y 1 + +/* Id values for MOUSE. */ +#define RETRO_DEVICE_ID_MOUSE_X 0 +#define RETRO_DEVICE_ID_MOUSE_Y 1 +#define RETRO_DEVICE_ID_MOUSE_LEFT 2 +#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 +#define RETRO_DEVICE_ID_MOUSE_WHEELUP 4 +#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN 5 +#define RETRO_DEVICE_ID_MOUSE_MIDDLE 6 +#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELUP 7 +#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELDOWN 8 +#define RETRO_DEVICE_ID_MOUSE_BUTTON_4 9 +#define RETRO_DEVICE_ID_MOUSE_BUTTON_5 10 + +/* Id values for LIGHTGUN. */ +#define RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X 13 /*Absolute Position*/ +#define RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y 14 /*Absolute*/ +#define RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN 15 /*Status Check*/ +#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER 2 +#define RETRO_DEVICE_ID_LIGHTGUN_RELOAD 16 /*Forced off-screen shot*/ +#define RETRO_DEVICE_ID_LIGHTGUN_AUX_A 3 +#define RETRO_DEVICE_ID_LIGHTGUN_AUX_B 4 +#define RETRO_DEVICE_ID_LIGHTGUN_START 6 +#define RETRO_DEVICE_ID_LIGHTGUN_SELECT 7 +#define RETRO_DEVICE_ID_LIGHTGUN_AUX_C 8 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_UP 9 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_DOWN 10 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_LEFT 11 +#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_RIGHT 12 +/* deprecated */ +#define RETRO_DEVICE_ID_LIGHTGUN_X 0 /*Relative Position*/ +#define RETRO_DEVICE_ID_LIGHTGUN_Y 1 /*Relative*/ +#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR 3 /*Use Aux:A*/ +#define RETRO_DEVICE_ID_LIGHTGUN_TURBO 4 /*Use Aux:B*/ +#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE 5 /*Use Start*/ + +/* Id values for POINTER. */ +#define RETRO_DEVICE_ID_POINTER_X 0 +#define RETRO_DEVICE_ID_POINTER_Y 1 +#define RETRO_DEVICE_ID_POINTER_PRESSED 2 +#define RETRO_DEVICE_ID_POINTER_COUNT 3 + +/* Returned from retro_get_region(). */ +#define RETRO_REGION_NTSC 0 +#define RETRO_REGION_PAL 1 + +/* Id values for LANGUAGE */ +enum retro_language +{ + RETRO_LANGUAGE_ENGLISH = 0, + RETRO_LANGUAGE_JAPANESE = 1, + RETRO_LANGUAGE_FRENCH = 2, + RETRO_LANGUAGE_SPANISH = 3, + RETRO_LANGUAGE_GERMAN = 4, + RETRO_LANGUAGE_ITALIAN = 5, + RETRO_LANGUAGE_DUTCH = 6, + RETRO_LANGUAGE_PORTUGUESE_BRAZIL = 7, + RETRO_LANGUAGE_PORTUGUESE_PORTUGAL = 8, + RETRO_LANGUAGE_RUSSIAN = 9, + RETRO_LANGUAGE_KOREAN = 10, + RETRO_LANGUAGE_CHINESE_TRADITIONAL = 11, + RETRO_LANGUAGE_CHINESE_SIMPLIFIED = 12, + RETRO_LANGUAGE_ESPERANTO = 13, + RETRO_LANGUAGE_POLISH = 14, + RETRO_LANGUAGE_VIETNAMESE = 15, + RETRO_LANGUAGE_ARABIC = 16, + RETRO_LANGUAGE_GREEK = 17, + RETRO_LANGUAGE_TURKISH = 18, + RETRO_LANGUAGE_SLOVAK = 19, + RETRO_LANGUAGE_PERSIAN = 20, + RETRO_LANGUAGE_HEBREW = 21, + RETRO_LANGUAGE_ASTURIAN = 22, + RETRO_LANGUAGE_FINNISH = 23, + RETRO_LANGUAGE_INDONESIAN = 24, + RETRO_LANGUAGE_SWEDISH = 25, + RETRO_LANGUAGE_UKRAINIAN = 26, + RETRO_LANGUAGE_CZECH = 27, + RETRO_LANGUAGE_CATALAN_VALENCIA = 28, + RETRO_LANGUAGE_CATALAN = 29, + RETRO_LANGUAGE_BRITISH_ENGLISH = 30, + RETRO_LANGUAGE_HUNGARIAN = 31, + RETRO_LANGUAGE_BELARUSIAN = 32, + RETRO_LANGUAGE_LAST, + + /* Ensure sizeof(enum) == sizeof(int) */ + RETRO_LANGUAGE_DUMMY = INT_MAX +}; + +/* Passed to retro_get_memory_data/size(). + * If the memory type doesn't apply to the + * implementation NULL/0 can be returned. + */ +#define RETRO_MEMORY_MASK 0xff + +/* Regular save RAM. This RAM is usually found on a game cartridge, + * backed up by a battery. + * If save game data is too complex for a single memory buffer, + * the SAVE_DIRECTORY (preferably) or SYSTEM_DIRECTORY environment + * callback can be used. */ +#define RETRO_MEMORY_SAVE_RAM 0 + +/* Some games have a built-in clock to keep track of time. + * This memory is usually just a couple of bytes to keep track of time. + */ +#define RETRO_MEMORY_RTC 1 + +/* System ram lets a frontend peek into a game systems main RAM. */ +#define RETRO_MEMORY_SYSTEM_RAM 2 + +/* Video ram lets a frontend peek into a game systems video RAM (VRAM). */ +#define RETRO_MEMORY_VIDEO_RAM 3 + +/* Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. */ +enum retro_key +{ + RETROK_UNKNOWN = 0, + RETROK_FIRST = 0, + RETROK_BACKSPACE = 8, + RETROK_TAB = 9, + RETROK_CLEAR = 12, + RETROK_RETURN = 13, + RETROK_PAUSE = 19, + RETROK_ESCAPE = 27, + RETROK_SPACE = 32, + RETROK_EXCLAIM = 33, + RETROK_QUOTEDBL = 34, + RETROK_HASH = 35, + RETROK_DOLLAR = 36, + RETROK_AMPERSAND = 38, + RETROK_QUOTE = 39, + RETROK_LEFTPAREN = 40, + RETROK_RIGHTPAREN = 41, + RETROK_ASTERISK = 42, + RETROK_PLUS = 43, + RETROK_COMMA = 44, + RETROK_MINUS = 45, + RETROK_PERIOD = 46, + RETROK_SLASH = 47, + RETROK_0 = 48, + RETROK_1 = 49, + RETROK_2 = 50, + RETROK_3 = 51, + RETROK_4 = 52, + RETROK_5 = 53, + RETROK_6 = 54, + RETROK_7 = 55, + RETROK_8 = 56, + RETROK_9 = 57, + RETROK_COLON = 58, + RETROK_SEMICOLON = 59, + RETROK_LESS = 60, + RETROK_EQUALS = 61, + RETROK_GREATER = 62, + RETROK_QUESTION = 63, + RETROK_AT = 64, + RETROK_LEFTBRACKET = 91, + RETROK_BACKSLASH = 92, + RETROK_RIGHTBRACKET = 93, + RETROK_CARET = 94, + RETROK_UNDERSCORE = 95, + RETROK_BACKQUOTE = 96, + RETROK_a = 97, + RETROK_b = 98, + RETROK_c = 99, + RETROK_d = 100, + RETROK_e = 101, + RETROK_f = 102, + RETROK_g = 103, + RETROK_h = 104, + RETROK_i = 105, + RETROK_j = 106, + RETROK_k = 107, + RETROK_l = 108, + RETROK_m = 109, + RETROK_n = 110, + RETROK_o = 111, + RETROK_p = 112, + RETROK_q = 113, + RETROK_r = 114, + RETROK_s = 115, + RETROK_t = 116, + RETROK_u = 117, + RETROK_v = 118, + RETROK_w = 119, + RETROK_x = 120, + RETROK_y = 121, + RETROK_z = 122, + RETROK_LEFTBRACE = 123, + RETROK_BAR = 124, + RETROK_RIGHTBRACE = 125, + RETROK_TILDE = 126, + RETROK_DELETE = 127, + + RETROK_KP0 = 256, + RETROK_KP1 = 257, + RETROK_KP2 = 258, + RETROK_KP3 = 259, + RETROK_KP4 = 260, + RETROK_KP5 = 261, + RETROK_KP6 = 262, + RETROK_KP7 = 263, + RETROK_KP8 = 264, + RETROK_KP9 = 265, + RETROK_KP_PERIOD = 266, + RETROK_KP_DIVIDE = 267, + RETROK_KP_MULTIPLY = 268, + RETROK_KP_MINUS = 269, + RETROK_KP_PLUS = 270, + RETROK_KP_ENTER = 271, + RETROK_KP_EQUALS = 272, + + RETROK_UP = 273, + RETROK_DOWN = 274, + RETROK_RIGHT = 275, + RETROK_LEFT = 276, + RETROK_INSERT = 277, + RETROK_HOME = 278, + RETROK_END = 279, + RETROK_PAGEUP = 280, + RETROK_PAGEDOWN = 281, + + RETROK_F1 = 282, + RETROK_F2 = 283, + RETROK_F3 = 284, + RETROK_F4 = 285, + RETROK_F5 = 286, + RETROK_F6 = 287, + RETROK_F7 = 288, + RETROK_F8 = 289, + RETROK_F9 = 290, + RETROK_F10 = 291, + RETROK_F11 = 292, + RETROK_F12 = 293, + RETROK_F13 = 294, + RETROK_F14 = 295, + RETROK_F15 = 296, + + RETROK_NUMLOCK = 300, + RETROK_CAPSLOCK = 301, + RETROK_SCROLLOCK = 302, + RETROK_RSHIFT = 303, + RETROK_LSHIFT = 304, + RETROK_RCTRL = 305, + RETROK_LCTRL = 306, + RETROK_RALT = 307, + RETROK_LALT = 308, + RETROK_RMETA = 309, + RETROK_LMETA = 310, + RETROK_LSUPER = 311, + RETROK_RSUPER = 312, + RETROK_MODE = 313, + RETROK_COMPOSE = 314, + + RETROK_HELP = 315, + RETROK_PRINT = 316, + RETROK_SYSREQ = 317, + RETROK_BREAK = 318, + RETROK_MENU = 319, + RETROK_POWER = 320, + RETROK_EURO = 321, + RETROK_UNDO = 322, + RETROK_OEM_102 = 323, + + RETROK_LAST, + + RETROK_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ +}; + +enum retro_mod +{ + RETROKMOD_NONE = 0x0000, + + RETROKMOD_SHIFT = 0x01, + RETROKMOD_CTRL = 0x02, + RETROKMOD_ALT = 0x04, + RETROKMOD_META = 0x08, + + RETROKMOD_NUMLOCK = 0x10, + RETROKMOD_CAPSLOCK = 0x20, + RETROKMOD_SCROLLOCK = 0x40, + + RETROKMOD_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */ +}; + +/* If set, this call is not part of the public libretro API yet. It can + * change or be removed at any time. */ +#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000 +/* Environment callback to be used internally in frontend. */ +#define RETRO_ENVIRONMENT_PRIVATE 0x20000 + +/* Environment commands. */ +#define RETRO_ENVIRONMENT_SET_ROTATION 1 /* const unsigned * -- + * Sets screen rotation of graphics. + * Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180, + * 270 degrees counter-clockwise respectively. + */ +#define RETRO_ENVIRONMENT_GET_OVERSCAN 2 /* bool * -- + * NOTE: As of 2019 this callback is considered deprecated in favor of + * using core options to manage overscan in a more nuanced, core-specific way. + * + * Boolean value whether or not the implementation should use overscan, + * or crop away overscan. + */ +#define RETRO_ENVIRONMENT_GET_CAN_DUPE 3 /* bool * -- + * Boolean value whether or not frontend supports frame duping, + * passing NULL to video frame callback. + */ + + /* Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES), + * and reserved to avoid possible ABI clash. + */ + +#define RETRO_ENVIRONMENT_SET_MESSAGE 6 /* const struct retro_message * -- + * Sets a message to be displayed in implementation-specific manner + * for a certain amount of 'frames'. + * Should not be used for trivial messages, which should simply be + * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a + * fallback, stderr). + */ +#define RETRO_ENVIRONMENT_SHUTDOWN 7 /* N/A (NULL) -- + * Requests the frontend to shutdown. + * Should only be used if game has a specific + * way to shutdown the game from a menu item or similar. + */ +#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 + /* const unsigned * -- + * Gives a hint to the frontend how demanding this implementation + * is on a system. E.g. reporting a level of 2 means + * this implementation should run decently on all frontends + * of level 2 and up. + * + * It can be used by the frontend to potentially warn + * about too demanding implementations. + * + * The levels are "floating". + * + * This function can be called on a per-game basis, + * as certain games an implementation can play might be + * particularly demanding. + * If called, it should be called in retro_load_game(). + */ +#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9 + /* const char ** -- + * Returns the "system" directory of the frontend. + * This directory can be used to store system specific + * content such as BIOSes, configuration data, etc. + * The returned value can be NULL. + * If so, no such directory is defined, + * and it's up to the implementation to find a suitable directory. + * + * NOTE: Some cores used this folder also for "save" data such as + * memory cards, etc, for lack of a better place to put it. + * This is now discouraged, and if possible, cores should try to + * use the new GET_SAVE_DIRECTORY. + */ +#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10 + /* const enum retro_pixel_format * -- + * Sets the internal pixel format used by the implementation. + * The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. + * This pixel format however, is deprecated (see enum retro_pixel_format). + * If the call returns false, the frontend does not support this pixel + * format. + * + * This function should be called inside retro_load_game() or + * retro_get_system_av_info(). + */ +#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11 + /* const struct retro_input_descriptor * -- + * Sets an array of retro_input_descriptors. + * It is up to the frontend to present this in a usable way. + * The array is terminated by retro_input_descriptor::description + * being set to NULL. + * This function can be called at any time, but it is recommended + * to call it as early as possible. + */ +#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12 + /* const struct retro_keyboard_callback * -- + * Sets a callback function used to notify core about keyboard events. + */ +#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13 + /* const struct retro_disk_control_callback * -- + * Sets an interface which frontend can use to eject and insert + * disk images. + * This is used for games which consist of multiple images and + * must be manually swapped out by the user (e.g. PSX). + */ +#define RETRO_ENVIRONMENT_SET_HW_RENDER 14 + /* struct retro_hw_render_callback * -- + * Sets an interface to let a libretro core render with + * hardware acceleration. + * Should be called in retro_load_game(). + * If successful, libretro cores will be able to render to a + * frontend-provided framebuffer. + * The size of this framebuffer will be at least as large as + * max_width/max_height provided in get_av_info(). + * If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or + * NULL to retro_video_refresh_t. + */ +#define RETRO_ENVIRONMENT_GET_VARIABLE 15 + /* struct retro_variable * -- + * Interface to acquire user-defined information from environment + * that cannot feasibly be supported in a multi-system way. + * 'key' should be set to a key which has already been set by + * SET_VARIABLES. + * 'data' will be set to a value or NULL. + */ +#define RETRO_ENVIRONMENT_SET_VARIABLES 16 + /* const struct retro_variable * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterward it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * + * 'data' points to an array of retro_variable structs + * terminated by a { NULL, NULL } element. + * retro_variable::key should be namespaced to not collide + * with other implementations' keys. E.g. A core called + * 'foo' should use keys named as 'foo_option'. + * retro_variable::value should contain a human readable + * description of the key as well as a '|' delimited list + * of expected values. + * + * The number of possible options should be very limited, + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * First entry should be treated as a default. + * + * Example entry: + * { "foo_option", "Speed hack coprocessor X; false|true" } + * + * Text before first ';' is description. This ';' must be + * followed by a space, and followed by a list of possible + * values split up with '|'. + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ +#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17 + /* bool * -- + * Result is set to true if some variables are updated by + * frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE. + * Variables should be queried with GET_VARIABLE. + */ +#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18 + /* const bool * -- + * If true, the libretro implementation supports calls to + * retro_load_game() with NULL as argument. + * Used by cores which can run without particular game data. + * This should be called within retro_set_environment() only. + */ +#define RETRO_ENVIRONMENT_GET_LIBRETRO_PATH 19 + /* const char ** -- + * Retrieves the absolute path from where this libretro + * implementation was loaded. + * NULL is returned if the libretro was loaded statically + * (i.e. linked statically to frontend), or if the path cannot be + * determined. + * Mostly useful in cooperation with SET_SUPPORT_NO_GAME as assets can + * be loaded without ugly hacks. + */ + + /* Environment 20 was an obsolete version of SET_AUDIO_CALLBACK. + * It was not used by any known core at the time, + * and was removed from the API. */ +#define RETRO_ENVIRONMENT_SET_FRAME_TIME_CALLBACK 21 + /* const struct retro_frame_time_callback * -- + * Lets the core know how much time has passed since last + * invocation of retro_run(). + * The frontend can tamper with the timing to fake fast-forward, + * slow-motion, frame stepping, etc. + * In this case the delta time will use the reference value + * in frame_time_callback.. + */ +#define RETRO_ENVIRONMENT_SET_AUDIO_CALLBACK 22 + /* const struct retro_audio_callback * -- + * Sets an interface which is used to notify a libretro core about audio + * being available for writing. + * The callback can be called from any thread, so a core using this must + * have a thread safe audio implementation. + * It is intended for games where audio and video are completely + * asynchronous and audio can be generated on the fly. + * This interface is not recommended for use with emulators which have + * highly synchronous audio. + * + * The callback only notifies about writability; the libretro core still + * has to call the normal audio callbacks + * to write audio. The audio callbacks must be called from within the + * notification callback. + * The amount of audio data to write is up to the implementation. + * Generally, the audio callback will be called continously in a loop. + * + * Due to thread safety guarantees and lack of sync between audio and + * video, a frontend can selectively disallow this interface based on + * internal configuration. A core using this interface must also + * implement the "normal" audio interface. + * + * A libretro core using SET_AUDIO_CALLBACK should also make use of + * SET_FRAME_TIME_CALLBACK. + */ +#define RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE 23 + /* struct retro_rumble_interface * -- + * Gets an interface which is used by a libretro core to set + * state of rumble motors in controllers. + * A strong and weak motor is supported, and they can be + * controlled indepedently. + * Should be called from either retro_init() or retro_load_game(). + * Should not be called from retro_set_environment(). + * Returns false if rumble functionality is unavailable. + */ +#define RETRO_ENVIRONMENT_GET_INPUT_DEVICE_CAPABILITIES 24 + /* uint64_t * -- + * Gets a bitmask telling which device type are expected to be + * handled properly in a call to retro_input_state_t. + * Devices which are not handled or recognized always return + * 0 in retro_input_state_t. + * Example bitmask: caps = (1 << RETRO_DEVICE_JOYPAD) | (1 << RETRO_DEVICE_ANALOG). + * Should only be called in retro_run(). + */ +#define RETRO_ENVIRONMENT_GET_SENSOR_INTERFACE (25 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_sensor_interface * -- + * Gets access to the sensor interface. + * The purpose of this interface is to allow + * setting state related to sensors such as polling rate, + * enabling/disable it entirely, etc. + * Reading sensor state is done via the normal + * input_state_callback API. + */ +#define RETRO_ENVIRONMENT_GET_CAMERA_INTERFACE (26 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_camera_callback * -- + * Gets an interface to a video camera driver. + * A libretro core can use this interface to get access to a + * video camera. + * New video frames are delivered in a callback in same + * thread as retro_run(). + * + * GET_CAMERA_INTERFACE should be called in retro_load_game(). + * + * Depending on the camera implementation used, camera frames + * will be delivered as a raw framebuffer, + * or as an OpenGL texture directly. + * + * The core has to tell the frontend here which types of + * buffers can be handled properly. + * An OpenGL texture can only be handled when using a + * libretro GL core (SET_HW_RENDER). + * It is recommended to use a libretro GL core when + * using camera interface. + * + * The camera is not started automatically. The retrieved start/stop + * functions must be used to explicitly + * start and stop the camera driver. + */ +#define RETRO_ENVIRONMENT_GET_LOG_INTERFACE 27 + /* struct retro_log_callback * -- + * Gets an interface for logging. This is useful for + * logging in a cross-platform way + * as certain platforms cannot use stderr for logging. + * It also allows the frontend to + * show logging information in a more suitable way. + * If this interface is not used, libretro cores should + * log to stderr as desired. + */ +#define RETRO_ENVIRONMENT_GET_PERF_INTERFACE 28 + /* struct retro_perf_callback * -- + * Gets an interface for performance counters. This is useful + * for performance logging in a cross-platform way and for detecting + * architecture-specific features, such as SIMD support. + */ +#define RETRO_ENVIRONMENT_GET_LOCATION_INTERFACE 29 + /* struct retro_location_callback * -- + * Gets access to the location interface. + * The purpose of this interface is to be able to retrieve + * location-based information from the host device, + * such as current latitude / longitude. + */ +#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 /* Old name, kept for compatibility. */ +#define RETRO_ENVIRONMENT_GET_CORE_ASSETS_DIRECTORY 30 + /* const char ** -- + * Returns the "core assets" directory of the frontend. + * This directory can be used to store specific assets that the + * core relies upon, such as art assets, + * input data, etc etc. + * The returned value can be NULL. + * If so, no such directory is defined, + * and it's up to the implementation to find a suitable directory. + */ +#define RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY 31 + /* const char ** -- + * Returns the "save" directory of the frontend, unless there is no + * save directory available. The save directory should be used to + * store SRAM, memory cards, high scores, etc, if the libretro core + * cannot use the regular memory interface (retro_get_memory_data()). + * + * If the frontend cannot designate a save directory, it will return + * NULL to indicate that the core should attempt to operate without a + * save directory set. + * + * NOTE: early libretro cores used the system directory for save + * files. Cores that need to be backwards-compatible can still check + * GET_SYSTEM_DIRECTORY. + */ +#define RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO 32 + /* const struct retro_system_av_info * -- + * Sets a new av_info structure. This can only be called from + * within retro_run(). + * This should *only* be used if the core is completely altering the + * internal resolutions, aspect ratios, timings, sampling rate, etc. + * Calling this can require a full reinitialization of video/audio + * drivers in the frontend, + * + * so it is important to call it very sparingly, and usually only with + * the users explicit consent. + * An eventual driver reinitialize will happen so that video and + * audio callbacks + * happening after this call within the same retro_run() call will + * target the newly initialized driver. + * + * This callback makes it possible to support configurable resolutions + * in games, which can be useful to + * avoid setting the "worst case" in max_width/max_height. + * + * ***HIGHLY RECOMMENDED*** Do not call this callback every time + * resolution changes in an emulator core if it's + * expected to be a temporary change, for the reasons of possible + * driver reinitialization. + * This call is not a free pass for not trying to provide + * correct values in retro_get_system_av_info(). If you need to change + * things like aspect ratio or nominal width/height, + * use RETRO_ENVIRONMENT_SET_GEOMETRY, which is a softer variant + * of SET_SYSTEM_AV_INFO. + * + * If this returns false, the frontend does not acknowledge a + * changed av_info struct. + */ +#define RETRO_ENVIRONMENT_SET_PROC_ADDRESS_CALLBACK 33 + /* const struct retro_get_proc_address_interface * -- + * Allows a libretro core to announce support for the + * get_proc_address() interface. + * This interface allows for a standard way to extend libretro where + * use of environment calls are too indirect, + * e.g. for cases where the frontend wants to call directly into the core. + * + * If a core wants to expose this interface, SET_PROC_ADDRESS_CALLBACK + * **MUST** be called from within retro_set_environment(). + */ +#define RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO 34 + /* const struct retro_subsystem_info * -- + * This environment call introduces the concept of libretro "subsystems". + * A subsystem is a variant of a libretro core which supports + * different kinds of games. + * The purpose of this is to support e.g. emulators which might + * have special needs, e.g. Super Nintendo's Super GameBoy, Sufami Turbo. + * It can also be used to pick among subsystems in an explicit way + * if the libretro implementation is a multi-system emulator itself. + * + * Loading a game via a subsystem is done with retro_load_game_special(), + * and this environment call allows a libretro core to expose which + * subsystems are supported for use with retro_load_game_special(). + * A core passes an array of retro_game_special_info which is terminated + * with a zeroed out retro_game_special_info struct. + * + * If a core wants to use this functionality, SET_SUBSYSTEM_INFO + * **MUST** be called from within retro_set_environment(). + */ +#define RETRO_ENVIRONMENT_SET_CONTROLLER_INFO 35 + /* const struct retro_controller_info * -- + * This environment call lets a libretro core tell the frontend + * which controller subclasses are recognized in calls to + * retro_set_controller_port_device(). + * + * Some emulators such as Super Nintendo support multiple lightgun + * types which must be specifically selected from. It is therefore + * sometimes necessary for a frontend to be able to tell the core + * about a special kind of input device which is not specifcally + * provided by the Libretro API. + * + * In order for a frontend to understand the workings of those devices, + * they must be defined as a specialized subclass of the generic device + * types already defined in the libretro API. + * + * The core must pass an array of const struct retro_controller_info which + * is terminated with a blanked out struct. Each element of the + * retro_controller_info struct corresponds to the ascending port index + * that is passed to retro_set_controller_port_device() when that function + * is called to indicate to the core that the frontend has changed the + * active device subclass. SEE ALSO: retro_set_controller_port_device() + * + * The ascending input port indexes provided by the core in the struct + * are generally presented by frontends as ascending User # or Player #, + * such as Player 1, Player 2, Player 3, etc. Which device subclasses are + * supported can vary per input port. + * + * The first inner element of each entry in the retro_controller_info array + * is a retro_controller_description struct that specifies the names and + * codes of all device subclasses that are available for the corresponding + * User or Player, beginning with the generic Libretro device that the + * subclasses are derived from. The second inner element of each entry is the + * total number of subclasses that are listed in the retro_controller_description. + * + * NOTE: Even if special device types are set in the libretro core, + * libretro should only poll input based on the base input device types. + */ +#define RETRO_ENVIRONMENT_SET_MEMORY_MAPS (36 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_memory_map * -- + * This environment call lets a libretro core tell the frontend + * about the memory maps this core emulates. + * This can be used to implement, for example, cheats in a core-agnostic way. + * + * Should only be used by emulators; it doesn't make much sense for + * anything else. + * It is recommended to expose all relevant pointers through + * retro_get_memory_* as well. + */ +#define RETRO_ENVIRONMENT_SET_GEOMETRY 37 + /* const struct retro_game_geometry * -- + * This environment call is similar to SET_SYSTEM_AV_INFO for changing + * video parameters, but provides a guarantee that drivers will not be + * reinitialized. + * This can only be called from within retro_run(). + * + * The purpose of this call is to allow a core to alter nominal + * width/heights as well as aspect ratios on-the-fly, which can be + * useful for some emulators to change in run-time. + * + * max_width/max_height arguments are ignored and cannot be changed + * with this call as this could potentially require a reinitialization or a + * non-constant time operation. + * If max_width/max_height are to be changed, SET_SYSTEM_AV_INFO is required. + * + * A frontend must guarantee that this environment call completes in + * constant time. + */ +#define RETRO_ENVIRONMENT_GET_USERNAME 38 + /* const char ** + * Returns the specified username of the frontend, if specified by the user. + * This username can be used as a nickname for a core that has online facilities + * or any other mode where personalization of the user is desirable. + * The returned value can be NULL. + * If this environ callback is used by a core that requires a valid username, + * a default username should be specified by the core. + */ +#define RETRO_ENVIRONMENT_GET_LANGUAGE 39 + /* unsigned * -- + * Returns the specified language of the frontend, if specified by the user. + * It can be used by the core for localization purposes. + */ +#define RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER (40 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_framebuffer * -- + * Returns a preallocated framebuffer which the core can use for rendering + * the frame into when not using SET_HW_RENDER. + * The framebuffer returned from this call must not be used + * after the current call to retro_run() returns. + * + * The goal of this call is to allow zero-copy behavior where a core + * can render directly into video memory, avoiding extra bandwidth cost by copying + * memory from core to video memory. + * + * If this call succeeds and the core renders into it, + * the framebuffer pointer and pitch can be passed to retro_video_refresh_t. + * If the buffer from GET_CURRENT_SOFTWARE_FRAMEBUFFER is to be used, + * the core must pass the exact + * same pointer as returned by GET_CURRENT_SOFTWARE_FRAMEBUFFER; + * i.e. passing a pointer which is offset from the + * buffer is undefined. The width, height and pitch parameters + * must also match exactly to the values obtained from GET_CURRENT_SOFTWARE_FRAMEBUFFER. + * + * It is possible for a frontend to return a different pixel format + * than the one used in SET_PIXEL_FORMAT. This can happen if the frontend + * needs to perform conversion. + * + * It is still valid for a core to render to a different buffer + * even if GET_CURRENT_SOFTWARE_FRAMEBUFFER succeeds. + * + * A frontend must make sure that the pointer obtained from this function is + * writeable (and readable). + */ +#define RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE (41 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_hw_render_interface ** -- + * Returns an API specific rendering interface for accessing API specific data. + * Not all HW rendering APIs support or need this. + * The contents of the returned pointer is specific to the rendering API + * being used. See the various headers like libretro_vulkan.h, etc. + * + * GET_HW_RENDER_INTERFACE cannot be called before context_reset has been called. + * Similarly, after context_destroyed callback returns, + * the contents of the HW_RENDER_INTERFACE are invalidated. + */ +#define RETRO_ENVIRONMENT_SET_SUPPORT_ACHIEVEMENTS (42 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const bool * -- + * If true, the libretro implementation supports achievements + * either via memory descriptors set with RETRO_ENVIRONMENT_SET_MEMORY_MAPS + * or via retro_get_memory_data/retro_get_memory_size. + * + * This must be called before the first call to retro_run. + */ +#define RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE (43 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* const struct retro_hw_render_context_negotiation_interface * -- + * Sets an interface which lets the libretro core negotiate with frontend how a context is created. + * The semantics of this interface depends on which API is used in SET_HW_RENDER earlier. + * This interface will be used when the frontend is trying to create a HW rendering context, + * so it will be used after SET_HW_RENDER, but before the context_reset callback. + */ +#define RETRO_ENVIRONMENT_SET_SERIALIZATION_QUIRKS 44 + /* uint64_t * -- + * Sets quirk flags associated with serialization. The frontend will zero any flags it doesn't + * recognize or support. Should be set in either retro_init or retro_load_game, but not both. + */ +#define RETRO_ENVIRONMENT_SET_HW_SHARED_CONTEXT (44 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* N/A (null) * -- + * The frontend will try to use a 'shared' hardware context (mostly applicable + * to OpenGL) when a hardware context is being set up. + * + * Returns true if the frontend supports shared hardware contexts and false + * if the frontend does not support shared hardware contexts. + * + * This will do nothing on its own until SET_HW_RENDER env callbacks are + * being used. + */ +#define RETRO_ENVIRONMENT_GET_VFS_INTERFACE (45 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_vfs_interface_info * -- + * Gets access to the VFS interface. + * VFS presence needs to be queried prior to load_game or any + * get_system/save/other_directory being called to let front end know + * core supports VFS before it starts handing out paths. + * It is recomended to do so in retro_set_environment + */ +#define RETRO_ENVIRONMENT_GET_LED_INTERFACE (46 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_led_interface * -- + * Gets an interface which is used by a libretro core to set + * state of LEDs. + */ +#define RETRO_ENVIRONMENT_GET_AUDIO_VIDEO_ENABLE (47 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* int * -- + * Tells the core if the frontend wants audio or video. + * If disabled, the frontend will discard the audio or video, + * so the core may decide to skip generating a frame or generating audio. + * This is mainly used for increasing performance. + * Bit 0 (value 1): Enable Video + * Bit 1 (value 2): Enable Audio + * Bit 2 (value 4): Use Fast Savestates. + * Bit 3 (value 8): Hard Disable Audio + * Other bits are reserved for future use and will default to zero. + * If video is disabled: + * * The frontend wants the core to not generate any video, + * including presenting frames via hardware acceleration. + * * The frontend's video frame callback will do nothing. + * * After running the frame, the video output of the next frame should be + * no different than if video was enabled, and saving and loading state + * should have no issues. + * If audio is disabled: + * * The frontend wants the core to not generate any audio. + * * The frontend's audio callbacks will do nothing. + * * After running the frame, the audio output of the next frame should be + * no different than if audio was enabled, and saving and loading state + * should have no issues. + * Fast Savestates: + * * Guaranteed to be created by the same binary that will load them. + * * Will not be written to or read from the disk. + * * Suggest that the core assumes loading state will succeed. + * * Suggest that the core updates its memory buffers in-place if possible. + * * Suggest that the core skips clearing memory. + * * Suggest that the core skips resetting the system. + * * Suggest that the core may skip validation steps. + * Hard Disable Audio: + * * Used for a secondary core when running ahead. + * * Indicates that the frontend will never need audio from the core. + * * Suggests that the core may stop synthesizing audio, but this should not + * compromise emulation accuracy. + * * Audio output for the next frame does not matter, and the frontend will + * never need an accurate audio state in the future. + * * State will never be saved when using Hard Disable Audio. + */ +#define RETRO_ENVIRONMENT_GET_MIDI_INTERFACE (48 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_midi_interface ** -- + * Returns a MIDI interface that can be used for raw data I/O. + */ + +#define RETRO_ENVIRONMENT_GET_FASTFORWARDING (49 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* bool * -- + * Boolean value that indicates whether or not the frontend is in + * fastforwarding mode. + */ + +#define RETRO_ENVIRONMENT_GET_TARGET_REFRESH_RATE (50 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* float * -- + * Float value that lets us know what target refresh rate + * is curently in use by the frontend. + * + * The core can use the returned value to set an ideal + * refresh rate/framerate. + */ + +#define RETRO_ENVIRONMENT_GET_INPUT_BITMASKS (51 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* bool * -- + * Boolean value that indicates whether or not the frontend supports + * input bitmasks being returned by retro_input_state_t. The advantage + * of this is that retro_input_state_t has to be only called once to + * grab all button states instead of multiple times. + * + * If it returns true, you can pass RETRO_DEVICE_ID_JOYPAD_MASK as 'id' + * to retro_input_state_t (make sure 'device' is set to RETRO_DEVICE_JOYPAD). + * It will return a bitmask of all the digital buttons. + */ + +#define RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION 52 + /* unsigned * -- + * Unsigned value is the API version number of the core options + * interface supported by the frontend. If callback return false, + * API version is assumed to be 0. + * + * In legacy code, core options are set by passing an array of + * retro_variable structs to RETRO_ENVIRONMENT_SET_VARIABLES. + * This may be still be done regardless of the core options + * interface version. + * + * If version is >= 1 however, core options may instead be set by + * passing an array of retro_core_option_definition structs to + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS, or a 2D array of + * retro_core_option_definition structs to RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL. + * This allows the core to additionally set option sublabel information + * and/or provide localisation support. + * + * If version is >= 2, core options may instead be set by passing + * a retro_core_options_v2 struct to RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2, + * or an array of retro_core_options_v2 structs to + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL. This allows the core + * to additionally set optional core option category information + * for frontends with core option category support. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS 53 + /* const struct retro_core_option_definition ** -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 1. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * + * 'data' points to an array of retro_core_option_definition structs + * terminated by a { NULL, NULL, NULL, {{0}}, NULL } element. + * retro_core_option_definition::key should be namespaced to not collide + * with other implementations' keys. e.g. A core called + * 'foo' should use keys named as 'foo_option'. + * retro_core_option_definition::desc should contain a human readable + * description of the key. + * retro_core_option_definition::info should contain any additional human + * readable information text that a typical user may need to + * understand the functionality of the option. + * retro_core_option_definition::values is an array of retro_core_option_value + * structs terminated by a { NULL, NULL } element. + * > retro_core_option_definition::values[index].value is an expected option + * value. + * > retro_core_option_definition::values[index].label is a human readable + * label used when displaying the value on screen. If NULL, + * the value itself is used. + * retro_core_option_definition::default_value is the default core option + * setting. It must match one of the expected option values in the + * retro_core_option_definition::values array. If it does not, or the + * default value is NULL, the first entry in the + * retro_core_option_definition::values array is treated as the default. + * + * The number of possible option values should be very limited, + * and must be less than RETRO_NUM_CORE_OPTION_VALUES_MAX. + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * Example entry: + * { + * "foo_option", + * "Speed hack coprocessor X", + * "Provides increased performance at the expense of reduced accuracy", + * { + * { "false", NULL }, + * { "true", NULL }, + * { "unstable", "Turbo (Unstable)" }, + * { NULL, NULL }, + * }, + * "false" + * } + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL 54 + /* const struct retro_core_options_intl * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 1. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * + * This is fundamentally the same as RETRO_ENVIRONMENT_SET_CORE_OPTIONS, + * with the addition of localisation support. The description of the + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS callback should be consulted + * for further details. + * + * 'data' points to a retro_core_options_intl struct. + * + * retro_core_options_intl::us is a pointer to an array of + * retro_core_option_definition structs defining the US English + * core options implementation. It must point to a valid array. + * + * retro_core_options_intl::local is a pointer to an array of + * retro_core_option_definition structs defining core options for + * the current frontend language. It may be NULL (in which case + * retro_core_options_intl::us is used by the frontend). Any items + * missing from this array will be read from retro_core_options_intl::us + * instead. + * + * NOTE: Default core option values are always taken from the + * retro_core_options_intl::us array. Any default values in + * retro_core_options_intl::local array will be ignored. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY 55 + /* struct retro_core_option_display * -- + * + * Allows an implementation to signal the environment to show + * or hide a variable when displaying core options. This is + * considered a *suggestion*. The frontend is free to ignore + * this callback, and its implementation not considered mandatory. + * + * 'data' points to a retro_core_option_display struct + * + * retro_core_option_display::key is a variable identifier + * which has already been set by SET_VARIABLES/SET_CORE_OPTIONS. + * + * retro_core_option_display::visible is a boolean, specifying + * whether variable should be displayed + * + * Note that all core option variables will be set visible by + * default when calling SET_VARIABLES/SET_CORE_OPTIONS. + */ + +#define RETRO_ENVIRONMENT_GET_PREFERRED_HW_RENDER 56 + /* unsigned * -- + * + * Allows an implementation to ask frontend preferred hardware + * context to use. Core should use this information to deal + * with what specific context to request with SET_HW_RENDER. + * + * 'data' points to an unsigned variable + */ + +#define RETRO_ENVIRONMENT_GET_DISK_CONTROL_INTERFACE_VERSION 57 + /* unsigned * -- + * Unsigned value is the API version number of the disk control + * interface supported by the frontend. If callback return false, + * API version is assumed to be 0. + * + * In legacy code, the disk control interface is defined by passing + * a struct of type retro_disk_control_callback to + * RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE. + * This may be still be done regardless of the disk control + * interface version. + * + * If version is >= 1 however, the disk control interface may + * instead be defined by passing a struct of type + * retro_disk_control_ext_callback to + * RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE. + * This allows the core to provide additional information about + * disk images to the frontend and/or enables extra + * disk control functionality by the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE 58 + /* const struct retro_disk_control_ext_callback * -- + * Sets an interface which frontend can use to eject and insert + * disk images, and also obtain information about individual + * disk image files registered by the core. + * This is used for games which consist of multiple images and + * must be manually swapped out by the user (e.g. PSX, floppy disk + * based systems). + */ + +#define RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION 59 + /* unsigned * -- + * Unsigned value is the API version number of the message + * interface supported by the frontend. If callback returns + * false, API version is assumed to be 0. + * + * In legacy code, messages may be displayed in an + * implementation-specific manner by passing a struct + * of type retro_message to RETRO_ENVIRONMENT_SET_MESSAGE. + * This may be still be done regardless of the message + * interface version. + * + * If version is >= 1 however, messages may instead be + * displayed by passing a struct of type retro_message_ext + * to RETRO_ENVIRONMENT_SET_MESSAGE_EXT. This allows the + * core to specify message logging level, priority and + * destination (OSD, logging interface or both). + */ + +#define RETRO_ENVIRONMENT_SET_MESSAGE_EXT 60 + /* const struct retro_message_ext * -- + * Sets a message to be displayed in an implementation-specific + * manner for a certain amount of 'frames'. Additionally allows + * the core to specify message logging level, priority and + * destination (OSD, logging interface or both). + * Should not be used for trivial messages, which should simply be + * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a + * fallback, stderr). + */ + +#define RETRO_ENVIRONMENT_GET_INPUT_MAX_USERS 61 + /* unsigned * -- + * Unsigned value is the number of active input devices + * provided by the frontend. This may change between + * frames, but will remain constant for the duration + * of each frame. + * If callback returns true, a core need not poll any + * input device with an index greater than or equal to + * the number of active devices. + * If callback returns false, the number of active input + * devices is unknown. In this case, all input devices + * should be considered active. + */ + +#define RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK 62 + /* const struct retro_audio_buffer_status_callback * -- + * Lets the core know the occupancy level of the frontend + * audio buffer. Can be used by a core to attempt frame + * skipping in order to avoid buffer under-runs. + * A core may pass NULL to disable buffer status reporting + * in the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_MINIMUM_AUDIO_LATENCY 63 + /* const unsigned * -- + * Sets minimum frontend audio latency in milliseconds. + * Resultant audio latency may be larger than set value, + * or smaller if a hardware limit is encountered. A frontend + * is expected to honour requests up to 512 ms. + * + * - If value is less than current frontend + * audio latency, callback has no effect + * - If value is zero, default frontend audio + * latency is set + * + * May be used by a core to increase audio latency and + * therefore decrease the probability of buffer under-runs + * (crackling) when performing 'intensive' operations. + * A core utilising RETRO_ENVIRONMENT_SET_AUDIO_BUFFER_STATUS_CALLBACK + * to implement audio-buffer-based frame skipping may achieve + * optimal results by setting the audio latency to a 'high' + * (typically 6x or 8x) integer multiple of the expected + * frame time. + * + * WARNING: This can only be called from within retro_run(). + * Calling this can require a full reinitialization of audio + * drivers in the frontend, so it is important to call it very + * sparingly, and usually only with the users explicit consent. + * An eventual driver reinitialize will happen so that audio + * callbacks happening after this call within the same retro_run() + * call will target the newly initialized driver. + */ + +#define RETRO_ENVIRONMENT_SET_FASTFORWARDING_OVERRIDE 64 + /* const struct retro_fastforwarding_override * -- + * Used by a libretro core to override the current + * fastforwarding mode of the frontend. + * If NULL is passed to this function, the frontend + * will return true if fastforwarding override + * functionality is supported (no change in + * fastforwarding state will occur in this case). + */ + +#define RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE 65 + /* const struct retro_system_content_info_override * -- + * Allows an implementation to override 'global' content + * info parameters reported by retro_get_system_info(). + * Overrides also affect subsystem content info parameters + * set via RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO. + * This function must be called inside retro_set_environment(). + * If callback returns false, content info overrides + * are unsupported by the frontend, and will be ignored. + * If callback returns true, extended game info may be + * retrieved by calling RETRO_ENVIRONMENT_GET_GAME_INFO_EXT + * in retro_load_game() or retro_load_game_special(). + * + * 'data' points to an array of retro_system_content_info_override + * structs terminated by a { NULL, false, false } element. + * If 'data' is NULL, no changes will be made to the frontend; + * a core may therefore pass NULL in order to test whether + * the RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE and + * RETRO_ENVIRONMENT_GET_GAME_INFO_EXT callbacks are supported + * by the frontend. + * + * For struct member descriptions, see the definition of + * struct retro_system_content_info_override. + * + * Example: + * + * - struct retro_system_info: + * { + * "My Core", // library_name + * "v1.0", // library_version + * "m3u|md|cue|iso|chd|sms|gg|sg", // valid_extensions + * true, // need_fullpath + * false // block_extract + * } + * + * - Array of struct retro_system_content_info_override: + * { + * { + * "md|sms|gg", // extensions + * false, // need_fullpath + * true // persistent_data + * }, + * { + * "sg", // extensions + * false, // need_fullpath + * false // persistent_data + * }, + * { NULL, false, false } + * } + * + * Result: + * - Files of type m3u, cue, iso, chd will not be + * loaded by the frontend. Frontend will pass a + * valid path to the core, and core will handle + * loading internally + * - Files of type md, sms, gg will be loaded by + * the frontend. A valid memory buffer will be + * passed to the core. This memory buffer will + * remain valid until retro_deinit() returns + * - Files of type sg will be loaded by the frontend. + * A valid memory buffer will be passed to the core. + * This memory buffer will remain valid until + * retro_load_game() (or retro_load_game_special()) + * returns + * + * NOTE: If an extension is listed multiple times in + * an array of retro_system_content_info_override + * structs, only the first instance will be registered + */ + +#define RETRO_ENVIRONMENT_GET_GAME_INFO_EXT 66 + /* const struct retro_game_info_ext ** -- + * Allows an implementation to fetch extended game + * information, providing additional content path + * and memory buffer status details. + * This function may only be called inside + * retro_load_game() or retro_load_game_special(). + * If callback returns false, extended game information + * is unsupported by the frontend. In this case, only + * regular retro_game_info will be available. + * RETRO_ENVIRONMENT_GET_GAME_INFO_EXT is guaranteed + * to return true if RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE + * returns true. + * + * 'data' points to an array of retro_game_info_ext structs. + * + * For struct member descriptions, see the definition of + * struct retro_game_info_ext. + * + * - If function is called inside retro_load_game(), + * the retro_game_info_ext array is guaranteed to + * have a size of 1 - i.e. the returned pointer may + * be used to access directly the members of the + * first retro_game_info_ext struct, for example: + * + * struct retro_game_info_ext *game_info_ext; + * if (environ_cb(RETRO_ENVIRONMENT_GET_GAME_INFO_EXT, &game_info_ext)) + * printf("Content Directory: %s\n", game_info_ext->dir); + * + * - If the function is called inside retro_load_game_special(), + * the retro_game_info_ext array is guaranteed to have a + * size equal to the num_info argument passed to + * retro_load_game_special() + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2 67 + /* const struct retro_core_options_v2 * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 2. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * If RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION returns an API + * version of >= 2, this callback is guaranteed to succeed + * (i.e. callback return value does not indicate success) + * If callback returns true, frontend has core option category + * support. + * If callback returns false, frontend does not have core option + * category support. + * + * 'data' points to a retro_core_options_v2 struct, containing + * of two pointers: + * - retro_core_options_v2::categories is an array of + * retro_core_option_v2_category structs terminated by a + * { NULL, NULL, NULL } element. If retro_core_options_v2::categories + * is NULL, all core options will have no category and will be shown + * at the top level of the frontend core option interface. If frontend + * does not have core option category support, categories array will + * be ignored. + * - retro_core_options_v2::definitions is an array of + * retro_core_option_v2_definition structs terminated by a + * { NULL, NULL, NULL, NULL, NULL, NULL, {{0}}, NULL } + * element. + * + * >> retro_core_option_v2_category notes: + * + * - retro_core_option_v2_category::key should contain string + * that uniquely identifies the core option category. Valid + * key characters are [a-z, A-Z, 0-9, _, -] + * Namespace collisions with other implementations' category + * keys are permitted. + * - retro_core_option_v2_category::desc should contain a human + * readable description of the category key. + * - retro_core_option_v2_category::info should contain any + * additional human readable information text that a typical + * user may need to understand the nature of the core option + * category. + * + * Example entry: + * { + * "advanced_settings", + * "Advanced", + * "Options affecting low-level emulation performance and accuracy." + * } + * + * >> retro_core_option_v2_definition notes: + * + * - retro_core_option_v2_definition::key should be namespaced to not + * collide with other implementations' keys. e.g. A core called + * 'foo' should use keys named as 'foo_option'. Valid key characters + * are [a-z, A-Z, 0-9, _, -]. + * - retro_core_option_v2_definition::desc should contain a human readable + * description of the key. Will be used when the frontend does not + * have core option category support. Examples: "Aspect Ratio" or + * "Video > Aspect Ratio". + * - retro_core_option_v2_definition::desc_categorized should contain a + * human readable description of the key, which will be used when + * frontend has core option category support. Example: "Aspect Ratio", + * where associated retro_core_option_v2_category::desc is "Video". + * If empty or NULL, the string specified by + * retro_core_option_v2_definition::desc will be used instead. + * retro_core_option_v2_definition::desc_categorized will be ignored + * if retro_core_option_v2_definition::category_key is empty or NULL. + * - retro_core_option_v2_definition::info should contain any additional + * human readable information text that a typical user may need to + * understand the functionality of the option. + * - retro_core_option_v2_definition::info_categorized should contain + * any additional human readable information text that a typical user + * may need to understand the functionality of the option, and will be + * used when frontend has core option category support. This is provided + * to accommodate the case where info text references an option by + * name/desc, and the desc/desc_categorized text for that option differ. + * If empty or NULL, the string specified by + * retro_core_option_v2_definition::info will be used instead. + * retro_core_option_v2_definition::info_categorized will be ignored + * if retro_core_option_v2_definition::category_key is empty or NULL. + * - retro_core_option_v2_definition::category_key should contain a + * category identifier (e.g. "video" or "audio") that will be + * assigned to the core option if frontend has core option category + * support. A categorized option will be shown in a subsection/ + * submenu of the frontend core option interface. If key is empty + * or NULL, or if key does not match one of the + * retro_core_option_v2_category::key values in the associated + * retro_core_option_v2_category array, option will have no category + * and will be shown at the top level of the frontend core option + * interface. + * - retro_core_option_v2_definition::values is an array of + * retro_core_option_value structs terminated by a { NULL, NULL } + * element. + * --> retro_core_option_v2_definition::values[index].value is an + * expected option value. + * --> retro_core_option_v2_definition::values[index].label is a + * human readable label used when displaying the value on screen. + * If NULL, the value itself is used. + * - retro_core_option_v2_definition::default_value is the default + * core option setting. It must match one of the expected option + * values in the retro_core_option_v2_definition::values array. If + * it does not, or the default value is NULL, the first entry in the + * retro_core_option_v2_definition::values array is treated as the + * default. + * + * The number of possible option values should be very limited, + * and must be less than RETRO_NUM_CORE_OPTION_VALUES_MAX. + * i.e. it should be feasible to cycle through options + * without a keyboard. + * + * Example entries: + * + * - Uncategorized: + * + * { + * "foo_option", + * "Speed hack coprocessor X", + * NULL, + * "Provides increased performance at the expense of reduced accuracy.", + * NULL, + * NULL, + * { + * { "false", NULL }, + * { "true", NULL }, + * { "unstable", "Turbo (Unstable)" }, + * { NULL, NULL }, + * }, + * "false" + * } + * + * - Categorized: + * + * { + * "foo_option", + * "Advanced > Speed hack coprocessor X", + * "Speed hack coprocessor X", + * "Setting 'Advanced > Speed hack coprocessor X' to 'true' or 'Turbo' provides increased performance at the expense of reduced accuracy", + * "Setting 'Speed hack coprocessor X' to 'true' or 'Turbo' provides increased performance at the expense of reduced accuracy", + * "advanced_settings", + * { + * { "false", NULL }, + * { "true", NULL }, + * { "unstable", "Turbo (Unstable)" }, + * { NULL, NULL }, + * }, + * "false" + * } + * + * Only strings are operated on. The possible values will + * generally be displayed and stored as-is by the frontend. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL 68 + /* const struct retro_core_options_v2_intl * -- + * Allows an implementation to signal the environment + * which variables it might want to check for later using + * GET_VARIABLE. + * This allows the frontend to present these variables to + * a user dynamically. + * This should only be called if RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION + * returns an API version of >= 2. + * This should be called instead of RETRO_ENVIRONMENT_SET_VARIABLES. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL. + * This should be called instead of RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2. + * This should be called the first time as early as + * possible (ideally in retro_set_environment). + * Afterwards it may be called again for the core to communicate + * updated options to the frontend, but the number of core + * options must not change from the number in the initial call. + * If RETRO_ENVIRONMENT_GET_CORE_OPTIONS_VERSION returns an API + * version of >= 2, this callback is guaranteed to succeed + * (i.e. callback return value does not indicate success) + * If callback returns true, frontend has core option category + * support. + * If callback returns false, frontend does not have core option + * category support. + * + * This is fundamentally the same as RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2, + * with the addition of localisation support. The description of the + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2 callback should be consulted + * for further details. + * + * 'data' points to a retro_core_options_v2_intl struct. + * + * - retro_core_options_v2_intl::us is a pointer to a + * retro_core_options_v2 struct defining the US English + * core options implementation. It must point to a valid struct. + * + * - retro_core_options_v2_intl::local is a pointer to a + * retro_core_options_v2 struct defining core options for + * the current frontend language. It may be NULL (in which case + * retro_core_options_v2_intl::us is used by the frontend). Any items + * missing from this struct will be read from + * retro_core_options_v2_intl::us instead. + * + * NOTE: Default core option values are always taken from the + * retro_core_options_v2_intl::us struct. Any default values in + * the retro_core_options_v2_intl::local struct will be ignored. + */ + +#define RETRO_ENVIRONMENT_SET_CORE_OPTIONS_UPDATE_DISPLAY_CALLBACK 69 + /* const struct retro_core_options_update_display_callback * -- + * Allows a frontend to signal that a core must update + * the visibility of any dynamically hidden core options, + * and enables the frontend to detect visibility changes. + * Used by the frontend to update the menu display status + * of core options without requiring a call of retro_run(). + * Must be called in retro_set_environment(). + */ + +#define RETRO_ENVIRONMENT_SET_VARIABLE 70 + /* const struct retro_variable * -- + * Allows an implementation to notify the frontend + * that a core option value has changed. + * + * retro_variable::key and retro_variable::value + * must match strings that have been set previously + * via one of the following: + * + * - RETRO_ENVIRONMENT_SET_VARIABLES + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS_INTL + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2 + * - RETRO_ENVIRONMENT_SET_CORE_OPTIONS_V2_INTL + * + * After changing a core option value via this + * callback, RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE + * will return true. + * + * If data is NULL, no changes will be registered + * and the callback will return true; an + * implementation may therefore pass NULL in order + * to test whether the callback is supported. + */ + +#define RETRO_ENVIRONMENT_GET_THROTTLE_STATE (71 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_throttle_state * -- + * Allows an implementation to get details on the actual rate + * the frontend is attempting to call retro_run(). + */ + +#define RETRO_ENVIRONMENT_GET_SAVESTATE_CONTEXT (72 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* int * -- + * Tells the core about the context the frontend is asking for savestate. + * (see enum retro_savestate_context) + */ + +#define RETRO_ENVIRONMENT_GET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_SUPPORT (73 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_hw_render_context_negotiation_interface * -- + * Before calling SET_HW_RNEDER_CONTEXT_NEGOTIATION_INTERFACE, a core can query + * which version of the interface is supported. + * + * Frontend looks at interface_type and returns the maximum supported + * context negotiation interface version. + * If the interface_type is not supported or recognized by the frontend, a version of 0 + * must be returned in interface_version and true is returned by frontend. + * + * If this environment call returns true with interface_version greater than 0, + * a core can always use a negotiation interface version larger than what the frontend returns, but only + * earlier versions of the interface will be used by the frontend. + * A frontend must not reject a negotiation interface version that is larger than + * what the frontend supports. Instead, the frontend will use the older entry points that it recognizes. + * If this is incompatible with a particular core's requirements, it can error out early. + * + * Backwards compatibility note: + * This environment call was introduced after Vulkan v1 context negotiation. + * If this environment call is not supported by frontend - i.e. the environment call returns false - + * only Vulkan v1 context negotiation is supported (if Vulkan HW rendering is supported at all). + * If a core uses Vulkan negotiation interface with version > 1, negotiation may fail unexpectedly. + * All future updates to the context negotiation interface implies that frontend must support + * this environment call to query support. + */ + +#define RETRO_ENVIRONMENT_GET_JIT_CAPABLE 74 + /* bool * -- + * Result is set to true if the frontend has already verified JIT can be + * used, mainly for use iOS/tvOS. On other platforms the result is true. + */ + +#define RETRO_ENVIRONMENT_GET_MICROPHONE_INTERFACE (75 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_microphone_interface * -- + * Returns an interface that can be used to receive input from the microphone driver. + * + * Returns true if microphone support is available, + * even if no microphones are plugged in. + * Returns false if mic support is disabled or unavailable. + * + * This callback can be invoked at any time, + * even before the microphone driver is ready. + */ + +#define RETRO_ENVIRONMENT_SET_NETPACKET_INTERFACE 76 + /* const struct retro_netpacket_callback * -- + * When set, a core gains control over network packets sent and + * received during a multiplayer session. This can be used to + * emulate multiplayer games that were originally played on two + * or more separate consoles or computers connected together. + * + * The frontend will take care of connecting players together, + * and the core only needs to send the actual data as needed for + * the emulation, while handshake and connection management happen + * in the background. + * + * When two or more players are connected and this interface has + * been set, time manipulation features (such as pausing, slow motion, + * fast forward, rewinding, save state loading, etc.) are disabled to + * avoid interrupting communication. + * + * Should be set in either retro_init or retro_load_game, but not both. + * + * When not set, a frontend may use state serialization-based + * multiplayer, where a deterministic core supporting multiple + * input devices does not need to take any action on its own. + */ + +#define RETRO_ENVIRONMENT_GET_DEVICE_POWER (77 | RETRO_ENVIRONMENT_EXPERIMENTAL) + /* struct retro_device_power * -- + * Returns the device's current power state as reported by the frontend. + * This is useful for emulating the battery level in handheld consoles, + * or for reducing power consumption when on battery power. + * + * The return value indicates whether the frontend can provide this information, + * even if the parameter is NULL. + * + * If the frontend does not support this functionality, + * then the provided argument will remain unchanged. + * + * Note that this environment call describes the power state for the entire device, + * not for individual peripherals like controllers. + */ + +/* VFS functionality */ + +/* File paths: + * File paths passed as parameters when using this API shall be well formed UNIX-style, + * using "/" (unquoted forward slash) as directory separator regardless of the platform's native separator. + * Paths shall also include at least one forward slash ("game.bin" is an invalid path, use "./game.bin" instead). + * Other than the directory separator, cores shall not make assumptions about path format: + * "C:/path/game.bin", "http://example.com/game.bin", "#game/game.bin", "./game.bin" (without quotes) are all valid paths. + * Cores may replace the basename or remove path components from the end, and/or add new components; + * however, cores shall not append "./", "../" or multiple consecutive forward slashes ("//") to paths they request to front end. + * The frontend is encouraged to make such paths work as well as it can, but is allowed to give up if the core alters paths too much. + * Frontends are encouraged, but not required, to support native file system paths (modulo replacing the directory separator, if applicable). + * Cores are allowed to try using them, but must remain functional if the front rejects such requests. + * Cores are encouraged to use the libretro-common filestream functions for file I/O, + * as they seamlessly integrate with VFS, deal with directory separator replacement as appropriate + * and provide platform-specific fallbacks in cases where front ends do not support VFS. */ + +/* Opaque file handle + * Introduced in VFS API v1 */ +struct retro_vfs_file_handle; + +/* Opaque directory handle + * Introduced in VFS API v3 */ +struct retro_vfs_dir_handle; + +/* File open flags + * Introduced in VFS API v1 */ +#define RETRO_VFS_FILE_ACCESS_READ (1 << 0) /* Read only mode */ +#define RETRO_VFS_FILE_ACCESS_WRITE (1 << 1) /* Write only mode, discard contents and overwrites existing file unless RETRO_VFS_FILE_ACCESS_UPDATE is also specified */ +#define RETRO_VFS_FILE_ACCESS_READ_WRITE (RETRO_VFS_FILE_ACCESS_READ | RETRO_VFS_FILE_ACCESS_WRITE) /* Read-write mode, discard contents and overwrites existing file unless RETRO_VFS_FILE_ACCESS_UPDATE is also specified*/ +#define RETRO_VFS_FILE_ACCESS_UPDATE_EXISTING (1 << 2) /* Prevents discarding content of existing files opened for writing */ + +/* These are only hints. The frontend may choose to ignore them. Other than RAM/CPU/etc use, + and how they react to unlikely external interference (for example someone else writing to that file, + or the file's server going down), behavior will not change. */ +#define RETRO_VFS_FILE_ACCESS_HINT_NONE (0) +/* Indicate that the file will be accessed many times. The frontend should aggressively cache everything. */ +#define RETRO_VFS_FILE_ACCESS_HINT_FREQUENT_ACCESS (1 << 0) + +/* Seek positions */ +#define RETRO_VFS_SEEK_POSITION_START 0 +#define RETRO_VFS_SEEK_POSITION_CURRENT 1 +#define RETRO_VFS_SEEK_POSITION_END 2 + +/* stat() result flags + * Introduced in VFS API v3 */ +#define RETRO_VFS_STAT_IS_VALID (1 << 0) +#define RETRO_VFS_STAT_IS_DIRECTORY (1 << 1) +#define RETRO_VFS_STAT_IS_CHARACTER_SPECIAL (1 << 2) + +/* Get path from opaque handle. Returns the exact same path passed to file_open when getting the handle + * Introduced in VFS API v1 */ +typedef const char *(RETRO_CALLCONV *retro_vfs_get_path_t)(struct retro_vfs_file_handle *stream); + +/* Open a file for reading or writing. If path points to a directory, this will + * fail. Returns the opaque file handle, or NULL for error. + * Introduced in VFS API v1 */ +typedef struct retro_vfs_file_handle *(RETRO_CALLCONV *retro_vfs_open_t)(const char *path, unsigned mode, unsigned hints); + +/* Close the file and release its resources. Must be called if open_file returns non-NULL. Returns 0 on success, -1 on failure. + * Whether the call succeeds ot not, the handle passed as parameter becomes invalid and should no longer be used. + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_close_t)(struct retro_vfs_file_handle *stream); + +/* Return the size of the file in bytes, or -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_size_t)(struct retro_vfs_file_handle *stream); + +/* Truncate file to specified size. Returns 0 on success or -1 on error + * Introduced in VFS API v2 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_truncate_t)(struct retro_vfs_file_handle *stream, int64_t length); + +/* Get the current read / write position for the file. Returns -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_tell_t)(struct retro_vfs_file_handle *stream); + +/* Set the current read/write position for the file. Returns the new position, -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_seek_t)(struct retro_vfs_file_handle *stream, int64_t offset, int seek_position); + +/* Read data from a file. Returns the number of bytes read, or -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_read_t)(struct retro_vfs_file_handle *stream, void *s, uint64_t len); + +/* Write data to a file. Returns the number of bytes written, or -1 for error. + * Introduced in VFS API v1 */ +typedef int64_t (RETRO_CALLCONV *retro_vfs_write_t)(struct retro_vfs_file_handle *stream, const void *s, uint64_t len); + +/* Flush pending writes to file, if using buffered IO. Returns 0 on sucess, or -1 on failure. + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_flush_t)(struct retro_vfs_file_handle *stream); + +/* Delete the specified file. Returns 0 on success, -1 on failure + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_remove_t)(const char *path); + +/* Rename the specified file. Returns 0 on success, -1 on failure + * Introduced in VFS API v1 */ +typedef int (RETRO_CALLCONV *retro_vfs_rename_t)(const char *old_path, const char *new_path); + +/* Stat the specified file. Retruns a bitmask of RETRO_VFS_STAT_* flags, none are set if path was not valid. + * Additionally stores file size in given variable, unless NULL is given. + * Introduced in VFS API v3 */ +typedef int (RETRO_CALLCONV *retro_vfs_stat_t)(const char *path, int32_t *size); + +/* Create the specified directory. Returns 0 on success, -1 on unknown failure, -2 if already exists. + * Introduced in VFS API v3 */ +typedef int (RETRO_CALLCONV *retro_vfs_mkdir_t)(const char *dir); + +/* Open the specified directory for listing. Returns the opaque dir handle, or NULL for error. + * Support for the include_hidden argument may vary depending on the platform. + * Introduced in VFS API v3 */ +typedef struct retro_vfs_dir_handle *(RETRO_CALLCONV *retro_vfs_opendir_t)(const char *dir, bool include_hidden); + +/* Read the directory entry at the current position, and move the read pointer to the next position. + * Returns true on success, false if already on the last entry. + * Introduced in VFS API v3 */ +typedef bool (RETRO_CALLCONV *retro_vfs_readdir_t)(struct retro_vfs_dir_handle *dirstream); + +/* Get the name of the last entry read. Returns a string on success, or NULL for error. + * The returned string pointer is valid until the next call to readdir or closedir. + * Introduced in VFS API v3 */ +typedef const char *(RETRO_CALLCONV *retro_vfs_dirent_get_name_t)(struct retro_vfs_dir_handle *dirstream); + +/* Check if the last entry read was a directory. Returns true if it was, false otherwise (or on error). + * Introduced in VFS API v3 */ +typedef bool (RETRO_CALLCONV *retro_vfs_dirent_is_dir_t)(struct retro_vfs_dir_handle *dirstream); + +/* Close the directory and release its resources. Must be called if opendir returns non-NULL. Returns 0 on success, -1 on failure. + * Whether the call succeeds ot not, the handle passed as parameter becomes invalid and should no longer be used. + * Introduced in VFS API v3 */ +typedef int (RETRO_CALLCONV *retro_vfs_closedir_t)(struct retro_vfs_dir_handle *dirstream); + +struct retro_vfs_interface +{ + /* VFS API v1 */ + retro_vfs_get_path_t get_path; + retro_vfs_open_t open; + retro_vfs_close_t close; + retro_vfs_size_t size; + retro_vfs_tell_t tell; + retro_vfs_seek_t seek; + retro_vfs_read_t read; + retro_vfs_write_t write; + retro_vfs_flush_t flush; + retro_vfs_remove_t remove; + retro_vfs_rename_t rename; + /* VFS API v2 */ + retro_vfs_truncate_t truncate; + /* VFS API v3 */ + retro_vfs_stat_t stat; + retro_vfs_mkdir_t mkdir; + retro_vfs_opendir_t opendir; + retro_vfs_readdir_t readdir; + retro_vfs_dirent_get_name_t dirent_get_name; + retro_vfs_dirent_is_dir_t dirent_is_dir; + retro_vfs_closedir_t closedir; +}; + +struct retro_vfs_interface_info +{ + /* Set by core: should this be higher than the version the front end supports, + * front end will return false in the RETRO_ENVIRONMENT_GET_VFS_INTERFACE call + * Introduced in VFS API v1 */ + uint32_t required_interface_version; + + /* Frontend writes interface pointer here. The frontend also sets the actual + * version, must be at least required_interface_version. + * Introduced in VFS API v1 */ + struct retro_vfs_interface *iface; +}; + +enum retro_hw_render_interface_type +{ + RETRO_HW_RENDER_INTERFACE_VULKAN = 0, + RETRO_HW_RENDER_INTERFACE_D3D9 = 1, + RETRO_HW_RENDER_INTERFACE_D3D10 = 2, + RETRO_HW_RENDER_INTERFACE_D3D11 = 3, + RETRO_HW_RENDER_INTERFACE_D3D12 = 4, + RETRO_HW_RENDER_INTERFACE_GSKIT_PS2 = 5, + RETRO_HW_RENDER_INTERFACE_DUMMY = INT_MAX +}; + +/* Base struct. All retro_hw_render_interface_* types + * contain at least these fields. */ +struct retro_hw_render_interface +{ + enum retro_hw_render_interface_type interface_type; + unsigned interface_version; +}; + +typedef void (RETRO_CALLCONV *retro_set_led_state_t)(int led, int state); +struct retro_led_interface +{ + retro_set_led_state_t set_led_state; +}; + +/* Retrieves the current state of the MIDI input. + * Returns true if it's enabled, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_input_enabled_t)(void); + +/* Retrieves the current state of the MIDI output. + * Returns true if it's enabled, false otherwise */ +typedef bool (RETRO_CALLCONV *retro_midi_output_enabled_t)(void); + +/* Reads next byte from the input stream. + * Returns true if byte is read, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_read_t)(uint8_t *byte); + +/* Writes byte to the output stream. + * 'delta_time' is in microseconds and represent time elapsed since previous write. + * Returns true if byte is written, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_write_t)(uint8_t byte, uint32_t delta_time); + +/* Flushes previously written data. + * Returns true if successful, false otherwise. */ +typedef bool (RETRO_CALLCONV *retro_midi_flush_t)(void); + +struct retro_midi_interface +{ + retro_midi_input_enabled_t input_enabled; + retro_midi_output_enabled_t output_enabled; + retro_midi_read_t read; + retro_midi_write_t write; + retro_midi_flush_t flush; +}; + +enum retro_hw_render_context_negotiation_interface_type +{ + RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN = 0, + RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_DUMMY = INT_MAX +}; + +/* Base struct. All retro_hw_render_context_negotiation_interface_* types + * contain at least these fields. */ +struct retro_hw_render_context_negotiation_interface +{ + enum retro_hw_render_context_negotiation_interface_type interface_type; + unsigned interface_version; +}; + +/* Serialized state is incomplete in some way. Set if serialization is + * usable in typical end-user cases but should not be relied upon to + * implement frame-sensitive frontend features such as netplay or + * rerecording. */ +#define RETRO_SERIALIZATION_QUIRK_INCOMPLETE (1 << 0) +/* The core must spend some time initializing before serialization is + * supported. retro_serialize() will initially fail; retro_unserialize() + * and retro_serialize_size() may or may not work correctly either. */ +#define RETRO_SERIALIZATION_QUIRK_MUST_INITIALIZE (1 << 1) +/* Serialization size may change within a session. */ +#define RETRO_SERIALIZATION_QUIRK_CORE_VARIABLE_SIZE (1 << 2) +/* Set by the frontend to acknowledge that it supports variable-sized + * states. */ +#define RETRO_SERIALIZATION_QUIRK_FRONT_VARIABLE_SIZE (1 << 3) +/* Serialized state can only be loaded during the same session. */ +#define RETRO_SERIALIZATION_QUIRK_SINGLE_SESSION (1 << 4) +/* Serialized state cannot be loaded on an architecture with a different + * endianness from the one it was saved on. */ +#define RETRO_SERIALIZATION_QUIRK_ENDIAN_DEPENDENT (1 << 5) +/* Serialized state cannot be loaded on a different platform from the one it + * was saved on for reasons other than endianness, such as word size + * dependence */ +#define RETRO_SERIALIZATION_QUIRK_PLATFORM_DEPENDENT (1 << 6) + +#define RETRO_MEMDESC_CONST (1 << 0) /* The frontend will never change this memory area once retro_load_game has returned. */ +#define RETRO_MEMDESC_BIGENDIAN (1 << 1) /* The memory area contains big endian data. Default is little endian. */ +#define RETRO_MEMDESC_SYSTEM_RAM (1 << 2) /* The memory area is system RAM. This is main RAM of the gaming system. */ +#define RETRO_MEMDESC_SAVE_RAM (1 << 3) /* The memory area is save RAM. This RAM is usually found on a game cartridge, backed up by a battery. */ +#define RETRO_MEMDESC_VIDEO_RAM (1 << 4) /* The memory area is video RAM (VRAM) */ +#define RETRO_MEMDESC_ALIGN_2 (1 << 16) /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */ +#define RETRO_MEMDESC_ALIGN_4 (2 << 16) +#define RETRO_MEMDESC_ALIGN_8 (3 << 16) +#define RETRO_MEMDESC_MINSIZE_2 (1 << 24) /* All memory in this region is accessed at least 2 bytes at the time. */ +#define RETRO_MEMDESC_MINSIZE_4 (2 << 24) +#define RETRO_MEMDESC_MINSIZE_8 (3 << 24) +struct retro_memory_descriptor +{ + uint64_t flags; + + /* Pointer to the start of the relevant ROM or RAM chip. + * It's strongly recommended to use 'offset' if possible, rather than + * doing math on the pointer. + * + * If the same byte is mapped my multiple descriptors, their descriptors + * must have the same pointer. + * If 'start' does not point to the first byte in the pointer, put the + * difference in 'offset' instead. + * + * May be NULL if there's nothing usable here (e.g. hardware registers and + * open bus). No flags should be set if the pointer is NULL. + * It's recommended to minimize the number of descriptors if possible, + * but not mandatory. */ + void *ptr; + size_t offset; + + /* This is the location in the emulated address space + * where the mapping starts. */ + size_t start; + + /* Which bits must be same as in 'start' for this mapping to apply. + * The first memory descriptor to claim a certain byte is the one + * that applies. + * A bit which is set in 'start' must also be set in this. + * Can be zero, in which case each byte is assumed mapped exactly once. + * In this case, 'len' must be a power of two. */ + size_t select; + + /* If this is nonzero, the set bits are assumed not connected to the + * memory chip's address pins. */ + size_t disconnect; + + /* This one tells the size of the current memory area. + * If, after start+disconnect are applied, the address is higher than + * this, the highest bit of the address is cleared. + * + * If the address is still too high, the next highest bit is cleared. + * Can be zero, in which case it's assumed to be infinite (as limited + * by 'select' and 'disconnect'). */ + size_t len; + + /* To go from emulated address to physical address, the following + * order applies: + * Subtract 'start', pick off 'disconnect', apply 'len', add 'offset'. */ + + /* The address space name must consist of only a-zA-Z0-9_-, + * should be as short as feasible (maximum length is 8 plus the NUL), + * and may not be any other address space plus one or more 0-9A-F + * at the end. + * However, multiple memory descriptors for the same address space is + * allowed, and the address space name can be empty. NULL is treated + * as empty. + * + * Address space names are case sensitive, but avoid lowercase if possible. + * The same pointer may exist in multiple address spaces. + * + * Examples: + * blank+blank - valid (multiple things may be mapped in the same namespace) + * 'Sp'+'Sp' - valid (multiple things may be mapped in the same namespace) + * 'A'+'B' - valid (neither is a prefix of each other) + * 'S'+blank - valid ('S' is not in 0-9A-F) + * 'a'+blank - valid ('a' is not in 0-9A-F) + * 'a'+'A' - valid (neither is a prefix of each other) + * 'AR'+blank - valid ('R' is not in 0-9A-F) + * 'ARB'+blank - valid (the B can't be part of the address either, because + * there is no namespace 'AR') + * blank+'B' - not valid, because it's ambigous which address space B1234 + * would refer to. + * The length can't be used for that purpose; the frontend may want + * to append arbitrary data to an address, without a separator. */ + const char *addrspace; + + /* TODO: When finalizing this one, add a description field, which should be + * "WRAM" or something roughly equally long. */ + + /* TODO: When finalizing this one, replace 'select' with 'limit', which tells + * which bits can vary and still refer to the same address (limit = ~select). + * TODO: limit? range? vary? something else? */ + + /* TODO: When finalizing this one, if 'len' is above what 'select' (or + * 'limit') allows, it's bankswitched. Bankswitched data must have both 'len' + * and 'select' != 0, and the mappings don't tell how the system switches the + * banks. */ + + /* TODO: When finalizing this one, fix the 'len' bit removal order. + * For len=0x1800, pointer 0x1C00 should go to 0x1400, not 0x0C00. + * Algorithm: Take bits highest to lowest, but if it goes above len, clear + * the most recent addition and continue on the next bit. + * TODO: Can the above be optimized? Is "remove the lowest bit set in both + * pointer and 'len'" equivalent? */ + + /* TODO: Some emulators (MAME?) emulate big endian systems by only accessing + * the emulated memory in 32-bit chunks, native endian. But that's nothing + * compared to Darek Mihocka + * (section Emulation 103 - Nearly Free Byte Reversal) - he flips the ENTIRE + * RAM backwards! I'll want to represent both of those, via some flags. + * + * I suspect MAME either didn't think of that idea, or don't want the #ifdef. + * Not sure which, nor do I really care. */ + + /* TODO: Some of those flags are unused and/or don't really make sense. Clean + * them up. */ +}; + +/* The frontend may use the largest value of 'start'+'select' in a + * certain namespace to infer the size of the address space. + * + * If the address space is larger than that, a mapping with .ptr=NULL + * should be at the end of the array, with .select set to all ones for + * as long as the address space is big. + * + * Sample descriptors (minus .ptr, and RETRO_MEMFLAG_ on the flags): + * SNES WRAM: + * .start=0x7E0000, .len=0x20000 + * (Note that this must be mapped before the ROM in most cases; some of the + * ROM mappers + * try to claim $7E0000, or at least $7E8000.) + * SNES SPC700 RAM: + * .addrspace="S", .len=0x10000 + * SNES WRAM mirrors: + * .flags=MIRROR, .start=0x000000, .select=0xC0E000, .len=0x2000 + * .flags=MIRROR, .start=0x800000, .select=0xC0E000, .len=0x2000 + * SNES WRAM mirrors, alternate equivalent descriptor: + * .flags=MIRROR, .select=0x40E000, .disconnect=~0x1FFF + * (Various similar constructions can be created by combining parts of + * the above two.) + * SNES LoROM (512KB, mirrored a couple of times): + * .flags=CONST, .start=0x008000, .select=0x408000, .disconnect=0x8000, .len=512*1024 + * .flags=CONST, .start=0x400000, .select=0x400000, .disconnect=0x8000, .len=512*1024 + * SNES HiROM (4MB): + * .flags=CONST, .start=0x400000, .select=0x400000, .len=4*1024*1024 + * .flags=CONST, .offset=0x8000, .start=0x008000, .select=0x408000, .len=4*1024*1024 + * SNES ExHiROM (8MB): + * .flags=CONST, .offset=0, .start=0xC00000, .select=0xC00000, .len=4*1024*1024 + * .flags=CONST, .offset=4*1024*1024, .start=0x400000, .select=0xC00000, .len=4*1024*1024 + * .flags=CONST, .offset=0x8000, .start=0x808000, .select=0xC08000, .len=4*1024*1024 + * .flags=CONST, .offset=4*1024*1024+0x8000, .start=0x008000, .select=0xC08000, .len=4*1024*1024 + * Clarify the size of the address space: + * .ptr=NULL, .select=0xFFFFFF + * .len can be implied by .select in many of them, but was included for clarity. + */ + +struct retro_memory_map +{ + const struct retro_memory_descriptor *descriptors; + unsigned num_descriptors; +}; + +struct retro_controller_description +{ + /* Human-readable description of the controller. Even if using a generic + * input device type, this can be set to the particular device type the + * core uses. */ + const char *desc; + + /* Device type passed to retro_set_controller_port_device(). If the device + * type is a sub-class of a generic input device type, use the + * RETRO_DEVICE_SUBCLASS macro to create an ID. + * + * E.g. RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 1). */ + unsigned id; +}; + +struct retro_controller_info +{ + const struct retro_controller_description *types; + unsigned num_types; +}; + +struct retro_subsystem_memory_info +{ + /* The extension associated with a memory type, e.g. "psram". */ + const char *extension; + + /* The memory type for retro_get_memory(). This should be at + * least 0x100 to avoid conflict with standardized + * libretro memory types. */ + unsigned type; +}; + +struct retro_subsystem_rom_info +{ + /* Describes what the content is (SGB BIOS, GB ROM, etc). */ + const char *desc; + + /* Same definition as retro_get_system_info(). */ + const char *valid_extensions; + + /* Same definition as retro_get_system_info(). */ + bool need_fullpath; + + /* Same definition as retro_get_system_info(). */ + bool block_extract; + + /* This is set if the content is required to load a game. + * If this is set to false, a zeroed-out retro_game_info can be passed. */ + bool required; + + /* Content can have multiple associated persistent + * memory types (retro_get_memory()). */ + const struct retro_subsystem_memory_info *memory; + unsigned num_memory; +}; + +struct retro_subsystem_info +{ + /* Human-readable string of the subsystem type, e.g. "Super GameBoy" */ + const char *desc; + + /* A computer friendly short string identifier for the subsystem type. + * This name must be [a-z]. + * E.g. if desc is "Super GameBoy", this can be "sgb". + * This identifier can be used for command-line interfaces, etc. + */ + const char *ident; + + /* Infos for each content file. The first entry is assumed to be the + * "most significant" content for frontend purposes. + * E.g. with Super GameBoy, the first content should be the GameBoy ROM, + * as it is the most "significant" content to a user. + * If a frontend creates new file paths based on the content used + * (e.g. savestates), it should use the path for the first ROM to do so. */ + const struct retro_subsystem_rom_info *roms; + + /* Number of content files associated with a subsystem. */ + unsigned num_roms; + + /* The type passed to retro_load_game_special(). */ + unsigned id; +}; + +typedef void (RETRO_CALLCONV *retro_proc_address_t)(void); + +/* libretro API extension functions: + * (None here so far). + * + * Get a symbol from a libretro core. + * Cores should only return symbols which are actual + * extensions to the libretro API. + * + * Frontends should not use this to obtain symbols to standard + * libretro entry points (static linking or dlsym). + * + * The symbol name must be equal to the function name, + * e.g. if void retro_foo(void); exists, the symbol must be called "retro_foo". + * The returned function pointer must be cast to the corresponding type. + */ +typedef retro_proc_address_t (RETRO_CALLCONV *retro_get_proc_address_t)(const char *sym); + +struct retro_get_proc_address_interface +{ + retro_get_proc_address_t get_proc_address; +}; + +enum retro_log_level +{ + RETRO_LOG_DEBUG = 0, + RETRO_LOG_INFO, + RETRO_LOG_WARN, + RETRO_LOG_ERROR, + + RETRO_LOG_DUMMY = INT_MAX +}; + +/* Logging function. Takes log level argument as well. */ +typedef void (RETRO_CALLCONV *retro_log_printf_t)(enum retro_log_level level, + const char *fmt, ...); + +struct retro_log_callback +{ + retro_log_printf_t log; +}; + +/* Performance related functions */ + +/* ID values for SIMD CPU features */ +#define RETRO_SIMD_SSE (1 << 0) +#define RETRO_SIMD_SSE2 (1 << 1) +#define RETRO_SIMD_VMX (1 << 2) +#define RETRO_SIMD_VMX128 (1 << 3) +#define RETRO_SIMD_AVX (1 << 4) +#define RETRO_SIMD_NEON (1 << 5) +#define RETRO_SIMD_SSE3 (1 << 6) +#define RETRO_SIMD_SSSE3 (1 << 7) +#define RETRO_SIMD_MMX (1 << 8) +#define RETRO_SIMD_MMXEXT (1 << 9) +#define RETRO_SIMD_SSE4 (1 << 10) +#define RETRO_SIMD_SSE42 (1 << 11) +#define RETRO_SIMD_AVX2 (1 << 12) +#define RETRO_SIMD_VFPU (1 << 13) +#define RETRO_SIMD_PS (1 << 14) +#define RETRO_SIMD_AES (1 << 15) +#define RETRO_SIMD_VFPV3 (1 << 16) +#define RETRO_SIMD_VFPV4 (1 << 17) +#define RETRO_SIMD_POPCNT (1 << 18) +#define RETRO_SIMD_MOVBE (1 << 19) +#define RETRO_SIMD_CMOV (1 << 20) +#define RETRO_SIMD_ASIMD (1 << 21) + +typedef uint64_t retro_perf_tick_t; +typedef int64_t retro_time_t; + +struct retro_perf_counter +{ + const char *ident; + retro_perf_tick_t start; + retro_perf_tick_t total; + retro_perf_tick_t call_cnt; + + bool registered; +}; + +/* Returns current time in microseconds. + * Tries to use the most accurate timer available. + */ +typedef retro_time_t (RETRO_CALLCONV *retro_perf_get_time_usec_t)(void); + +/* A simple counter. Usually nanoseconds, but can also be CPU cycles. + * Can be used directly if desired (when creating a more sophisticated + * performance counter system). + * */ +typedef retro_perf_tick_t (RETRO_CALLCONV *retro_perf_get_counter_t)(void); + +/* Returns a bit-mask of detected CPU features (RETRO_SIMD_*). */ +typedef uint64_t (RETRO_CALLCONV *retro_get_cpu_features_t)(void); + +/* Asks frontend to log and/or display the state of performance counters. + * Performance counters can always be poked into manually as well. + */ +typedef void (RETRO_CALLCONV *retro_perf_log_t)(void); + +/* Register a performance counter. + * ident field must be set with a discrete value and other values in + * retro_perf_counter must be 0. + * Registering can be called multiple times. To avoid calling to + * frontend redundantly, you can check registered field first. */ +typedef void (RETRO_CALLCONV *retro_perf_register_t)(struct retro_perf_counter *counter); + +/* Starts a registered counter. */ +typedef void (RETRO_CALLCONV *retro_perf_start_t)(struct retro_perf_counter *counter); + +/* Stops a registered counter. */ +typedef void (RETRO_CALLCONV *retro_perf_stop_t)(struct retro_perf_counter *counter); + +/* For convenience it can be useful to wrap register, start and stop in macros. + * E.g.: + * #ifdef LOG_PERFORMANCE + * #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!name.registered) perf_cb.perf_register(&(name)) + * #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.perf_start(&(name)) + * #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.perf_stop(&(name)) + * #else + * ... Blank macros ... + * #endif + * + * These can then be used mid-functions around code snippets. + * + * extern struct retro_perf_callback perf_cb; * Somewhere in the core. + * + * void do_some_heavy_work(void) + * { + * RETRO_PERFORMANCE_INIT(cb, work_1; + * RETRO_PERFORMANCE_START(cb, work_1); + * heavy_work_1(); + * RETRO_PERFORMANCE_STOP(cb, work_1); + * + * RETRO_PERFORMANCE_INIT(cb, work_2); + * RETRO_PERFORMANCE_START(cb, work_2); + * heavy_work_2(); + * RETRO_PERFORMANCE_STOP(cb, work_2); + * } + * + * void retro_deinit(void) + * { + * perf_cb.perf_log(); * Log all perf counters here for example. + * } + */ + +struct retro_perf_callback +{ + retro_perf_get_time_usec_t get_time_usec; + retro_get_cpu_features_t get_cpu_features; + + retro_perf_get_counter_t get_perf_counter; + retro_perf_register_t perf_register; + retro_perf_start_t perf_start; + retro_perf_stop_t perf_stop; + retro_perf_log_t perf_log; +}; + +/* FIXME: Document the sensor API and work out behavior. + * It will be marked as experimental until then. + */ +enum retro_sensor_action +{ + RETRO_SENSOR_ACCELEROMETER_ENABLE = 0, + RETRO_SENSOR_ACCELEROMETER_DISABLE, + RETRO_SENSOR_GYROSCOPE_ENABLE, + RETRO_SENSOR_GYROSCOPE_DISABLE, + RETRO_SENSOR_ILLUMINANCE_ENABLE, + RETRO_SENSOR_ILLUMINANCE_DISABLE, + + RETRO_SENSOR_DUMMY = INT_MAX +}; + +/* Id values for SENSOR types. */ +#define RETRO_SENSOR_ACCELEROMETER_X 0 +#define RETRO_SENSOR_ACCELEROMETER_Y 1 +#define RETRO_SENSOR_ACCELEROMETER_Z 2 +#define RETRO_SENSOR_GYROSCOPE_X 3 +#define RETRO_SENSOR_GYROSCOPE_Y 4 +#define RETRO_SENSOR_GYROSCOPE_Z 5 +#define RETRO_SENSOR_ILLUMINANCE 6 + +typedef bool (RETRO_CALLCONV *retro_set_sensor_state_t)(unsigned port, + enum retro_sensor_action action, unsigned rate); + +typedef float (RETRO_CALLCONV *retro_sensor_get_input_t)(unsigned port, unsigned id); + +struct retro_sensor_interface +{ + retro_set_sensor_state_t set_sensor_state; + retro_sensor_get_input_t get_sensor_input; +}; + +enum retro_camera_buffer +{ + RETRO_CAMERA_BUFFER_OPENGL_TEXTURE = 0, + RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER, + + RETRO_CAMERA_BUFFER_DUMMY = INT_MAX +}; + +/* Starts the camera driver. Can only be called in retro_run(). */ +typedef bool (RETRO_CALLCONV *retro_camera_start_t)(void); + +/* Stops the camera driver. Can only be called in retro_run(). */ +typedef void (RETRO_CALLCONV *retro_camera_stop_t)(void); + +/* Callback which signals when the camera driver is initialized + * and/or deinitialized. + * retro_camera_start_t can be called in initialized callback. + */ +typedef void (RETRO_CALLCONV *retro_camera_lifetime_status_t)(void); + +/* A callback for raw framebuffer data. buffer points to an XRGB8888 buffer. + * Width, height and pitch are similar to retro_video_refresh_t. + * First pixel is top-left origin. + */ +typedef void (RETRO_CALLCONV *retro_camera_frame_raw_framebuffer_t)(const uint32_t *buffer, + unsigned width, unsigned height, size_t pitch); + +/* A callback for when OpenGL textures are used. + * + * texture_id is a texture owned by camera driver. + * Its state or content should be considered immutable, except for things like + * texture filtering and clamping. + * + * texture_target is the texture target for the GL texture. + * These can include e.g. GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, and possibly + * more depending on extensions. + * + * affine points to a packed 3x3 column-major matrix used to apply an affine + * transform to texture coordinates. (affine_matrix * vec3(coord_x, coord_y, 1.0)) + * After transform, normalized texture coord (0, 0) should be bottom-left + * and (1, 1) should be top-right (or (width, height) for RECTANGLE). + * + * GL-specific typedefs are avoided here to avoid relying on gl.h in + * the API definition. + */ +typedef void (RETRO_CALLCONV *retro_camera_frame_opengl_texture_t)(unsigned texture_id, + unsigned texture_target, const float *affine); + +struct retro_camera_callback +{ + /* Set by libretro core. + * Example bitmask: caps = (1 << RETRO_CAMERA_BUFFER_OPENGL_TEXTURE) | (1 << RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER). + */ + uint64_t caps; + + /* Desired resolution for camera. Is only used as a hint. */ + unsigned width; + unsigned height; + + /* Set by frontend. */ + retro_camera_start_t start; + retro_camera_stop_t stop; + + /* Set by libretro core if raw framebuffer callbacks will be used. */ + retro_camera_frame_raw_framebuffer_t frame_raw_framebuffer; + + /* Set by libretro core if OpenGL texture callbacks will be used. */ + retro_camera_frame_opengl_texture_t frame_opengl_texture; + + /* Set by libretro core. Called after camera driver is initialized and + * ready to be started. + * Can be NULL, in which this callback is not called. + */ + retro_camera_lifetime_status_t initialized; + + /* Set by libretro core. Called right before camera driver is + * deinitialized. + * Can be NULL, in which this callback is not called. + */ + retro_camera_lifetime_status_t deinitialized; +}; + +/* Sets the interval of time and/or distance at which to update/poll + * location-based data. + * + * To ensure compatibility with all location-based implementations, + * values for both interval_ms and interval_distance should be provided. + * + * interval_ms is the interval expressed in milliseconds. + * interval_distance is the distance interval expressed in meters. + */ +typedef void (RETRO_CALLCONV *retro_location_set_interval_t)(unsigned interval_ms, + unsigned interval_distance); + +/* Start location services. The device will start listening for changes to the + * current location at regular intervals (which are defined with + * retro_location_set_interval_t). */ +typedef bool (RETRO_CALLCONV *retro_location_start_t)(void); + +/* Stop location services. The device will stop listening for changes + * to the current location. */ +typedef void (RETRO_CALLCONV *retro_location_stop_t)(void); + +/* Get the position of the current location. Will set parameters to + * 0 if no new location update has happened since the last time. */ +typedef bool (RETRO_CALLCONV *retro_location_get_position_t)(double *lat, double *lon, + double *horiz_accuracy, double *vert_accuracy); + +/* Callback which signals when the location driver is initialized + * and/or deinitialized. + * retro_location_start_t can be called in initialized callback. + */ +typedef void (RETRO_CALLCONV *retro_location_lifetime_status_t)(void); + +struct retro_location_callback +{ + retro_location_start_t start; + retro_location_stop_t stop; + retro_location_get_position_t get_position; + retro_location_set_interval_t set_interval; + + retro_location_lifetime_status_t initialized; + retro_location_lifetime_status_t deinitialized; +}; + +enum retro_rumble_effect +{ + RETRO_RUMBLE_STRONG = 0, + RETRO_RUMBLE_WEAK = 1, + + RETRO_RUMBLE_DUMMY = INT_MAX +}; + +/* Sets rumble state for joypad plugged in port 'port'. + * Rumble effects are controlled independently, + * and setting e.g. strong rumble does not override weak rumble. + * Strength has a range of [0, 0xffff]. + * + * Returns true if rumble state request was honored. + * Calling this before first retro_run() is likely to return false. */ +typedef bool (RETRO_CALLCONV *retro_set_rumble_state_t)(unsigned port, + enum retro_rumble_effect effect, uint16_t strength); + +struct retro_rumble_interface +{ + retro_set_rumble_state_t set_rumble_state; +}; + +/* Notifies libretro that audio data should be written. */ +typedef void (RETRO_CALLCONV *retro_audio_callback_t)(void); + +/* True: Audio driver in frontend is active, and callback is + * expected to be called regularily. + * False: Audio driver in frontend is paused or inactive. + * Audio callback will not be called until set_state has been + * called with true. + * Initial state is false (inactive). + */ +typedef void (RETRO_CALLCONV *retro_audio_set_state_callback_t)(bool enabled); + +struct retro_audio_callback +{ + retro_audio_callback_t callback; + retro_audio_set_state_callback_t set_state; +}; + +/* Notifies a libretro core of time spent since last invocation + * of retro_run() in microseconds. + * + * It will be called right before retro_run() every frame. + * The frontend can tamper with timing to support cases like + * fast-forward, slow-motion and framestepping. + * + * In those scenarios the reference frame time value will be used. */ +typedef int64_t retro_usec_t; +typedef void (RETRO_CALLCONV *retro_frame_time_callback_t)(retro_usec_t usec); +struct retro_frame_time_callback +{ + retro_frame_time_callback_t callback; + /* Represents the time of one frame. It is computed as + * 1000000 / fps, but the implementation will resolve the + * rounding to ensure that framestepping, etc is exact. */ + retro_usec_t reference; +}; + +/* Notifies a libretro core of the current occupancy + * level of the frontend audio buffer. + * + * - active: 'true' if audio buffer is currently + * in use. Will be 'false' if audio is + * disabled in the frontend + * + * - occupancy: Given as a value in the range [0,100], + * corresponding to the occupancy percentage + * of the audio buffer + * + * - underrun_likely: 'true' if the frontend expects an + * audio buffer underrun during the + * next frame (indicates that a core + * should attempt frame skipping) + * + * It will be called right before retro_run() every frame. */ +typedef void (RETRO_CALLCONV *retro_audio_buffer_status_callback_t)( + bool active, unsigned occupancy, bool underrun_likely); +struct retro_audio_buffer_status_callback +{ + retro_audio_buffer_status_callback_t callback; +}; + +/* Pass this to retro_video_refresh_t if rendering to hardware. + * Passing NULL to retro_video_refresh_t is still a frame dupe as normal. + * */ +#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1) + +/* Invalidates the current HW context. + * Any GL state is lost, and must not be deinitialized explicitly. + * If explicit deinitialization is desired by the libretro core, + * it should implement context_destroy callback. + * If called, all GPU resources must be reinitialized. + * Usually called when frontend reinits video driver. + * Also called first time video driver is initialized, + * allowing libretro core to initialize resources. + */ +typedef void (RETRO_CALLCONV *retro_hw_context_reset_t)(void); + +/* Gets current framebuffer which is to be rendered to. + * Could change every frame potentially. + */ +typedef uintptr_t (RETRO_CALLCONV *retro_hw_get_current_framebuffer_t)(void); + +/* Get a symbol from HW context. */ +typedef retro_proc_address_t (RETRO_CALLCONV *retro_hw_get_proc_address_t)(const char *sym); + +enum retro_hw_context_type +{ + RETRO_HW_CONTEXT_NONE = 0, + /* OpenGL 2.x. Driver can choose to use latest compatibility context. */ + RETRO_HW_CONTEXT_OPENGL = 1, + /* OpenGL ES 2.0. */ + RETRO_HW_CONTEXT_OPENGLES2 = 2, + /* Modern desktop core GL context. Use version_major/ + * version_minor fields to set GL version. */ + RETRO_HW_CONTEXT_OPENGL_CORE = 3, + /* OpenGL ES 3.0 */ + RETRO_HW_CONTEXT_OPENGLES3 = 4, + /* OpenGL ES 3.1+. Set version_major/version_minor. For GLES2 and GLES3, + * use the corresponding enums directly. */ + RETRO_HW_CONTEXT_OPENGLES_VERSION = 5, + + /* Vulkan, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE. */ + RETRO_HW_CONTEXT_VULKAN = 6, + + /* Direct3D11, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D11 = 7, + + /* Direct3D10, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D10 = 8, + + /* Direct3D12, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D12 = 9, + + /* Direct3D9, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE */ + RETRO_HW_CONTEXT_D3D9 = 10, + + RETRO_HW_CONTEXT_DUMMY = INT_MAX +}; + +struct retro_hw_render_callback +{ + /* Which API to use. Set by libretro core. */ + enum retro_hw_context_type context_type; + + /* Called when a context has been created or when it has been reset. + * An OpenGL context is only valid after context_reset() has been called. + * + * When context_reset is called, OpenGL resources in the libretro + * implementation are guaranteed to be invalid. + * + * It is possible that context_reset is called multiple times during an + * application lifecycle. + * If context_reset is called without any notification (context_destroy), + * the OpenGL context was lost and resources should just be recreated + * without any attempt to "free" old resources. + */ + retro_hw_context_reset_t context_reset; + + /* Set by frontend. + * TODO: This is rather obsolete. The frontend should not + * be providing preallocated framebuffers. */ + retro_hw_get_current_framebuffer_t get_current_framebuffer; + + /* Set by frontend. + * Can return all relevant functions, including glClear on Windows. */ + retro_hw_get_proc_address_t get_proc_address; + + /* Set if render buffers should have depth component attached. + * TODO: Obsolete. */ + bool depth; + + /* Set if stencil buffers should be attached. + * TODO: Obsolete. */ + bool stencil; + + /* If depth and stencil are true, a packed 24/8 buffer will be added. + * Only attaching stencil is invalid and will be ignored. */ + + /* Use conventional bottom-left origin convention. If false, + * standard libretro top-left origin semantics are used. + * TODO: Move to GL specific interface. */ + bool bottom_left_origin; + + /* Major version number for core GL context or GLES 3.1+. */ + unsigned version_major; + + /* Minor version number for core GL context or GLES 3.1+. */ + unsigned version_minor; + + /* If this is true, the frontend will go very far to avoid + * resetting context in scenarios like toggling fullscreen, etc. + * TODO: Obsolete? Maybe frontend should just always assume this ... + */ + bool cache_context; + + /* The reset callback might still be called in extreme situations + * such as if the context is lost beyond recovery. + * + * For optimal stability, set this to false, and allow context to be + * reset at any time. + */ + + /* A callback to be called before the context is destroyed in a + * controlled way by the frontend. */ + retro_hw_context_reset_t context_destroy; + + /* OpenGL resources can be deinitialized cleanly at this step. + * context_destroy can be set to NULL, in which resources will + * just be destroyed without any notification. + * + * Even when context_destroy is non-NULL, it is possible that + * context_reset is called without any destroy notification. + * This happens if context is lost by external factors (such as + * notified by GL_ARB_robustness). + * + * In this case, the context is assumed to be already dead, + * and the libretro implementation must not try to free any OpenGL + * resources in the subsequent context_reset. + */ + + /* Creates a debug context. */ + bool debug_context; +}; + +/* Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. + * Called by the frontend in response to keyboard events. + * down is set if the key is being pressed, or false if it is being released. + * keycode is the RETROK value of the char. + * character is the text character of the pressed key. (UTF-32). + * key_modifiers is a set of RETROKMOD values or'ed together. + * + * The pressed/keycode state can be indepedent of the character. + * It is also possible that multiple characters are generated from a + * single keypress. + * Keycode events should be treated separately from character events. + * However, when possible, the frontend should try to synchronize these. + * If only a character is posted, keycode should be RETROK_UNKNOWN. + * + * Similarily if only a keycode event is generated with no corresponding + * character, character should be 0. + */ +typedef void (RETRO_CALLCONV *retro_keyboard_event_t)(bool down, unsigned keycode, + uint32_t character, uint16_t key_modifiers); + +struct retro_keyboard_callback +{ + retro_keyboard_event_t callback; +}; + +/* Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE & + * RETRO_ENVIRONMENT_SET_DISK_CONTROL_EXT_INTERFACE. + * Should be set for implementations which can swap out multiple disk + * images in runtime. + * + * If the implementation can do this automatically, it should strive to do so. + * However, there are cases where the user must manually do so. + * + * Overview: To swap a disk image, eject the disk image with + * set_eject_state(true). + * Set the disk index with set_image_index(index). Insert the disk again + * with set_eject_state(false). + */ + +/* If ejected is true, "ejects" the virtual disk tray. + * When ejected, the disk image index can be set. + */ +typedef bool (RETRO_CALLCONV *retro_set_eject_state_t)(bool ejected); + +/* Gets current eject state. The initial state is 'not ejected'. */ +typedef bool (RETRO_CALLCONV *retro_get_eject_state_t)(void); + +/* Gets current disk index. First disk is index 0. + * If return value is >= get_num_images(), no disk is currently inserted. + */ +typedef unsigned (RETRO_CALLCONV *retro_get_image_index_t)(void); + +/* Sets image index. Can only be called when disk is ejected. + * The implementation supports setting "no disk" by using an + * index >= get_num_images(). + */ +typedef bool (RETRO_CALLCONV *retro_set_image_index_t)(unsigned index); + +/* Gets total number of images which are available to use. */ +typedef unsigned (RETRO_CALLCONV *retro_get_num_images_t)(void); + +struct retro_game_info; + +/* Replaces the disk image associated with index. + * Arguments to pass in info have same requirements as retro_load_game(). + * Virtual disk tray must be ejected when calling this. + * + * Replacing a disk image with info = NULL will remove the disk image + * from the internal list. + * As a result, calls to get_image_index() can change. + * + * E.g. replace_image_index(1, NULL), and previous get_image_index() + * returned 4 before. + * Index 1 will be removed, and the new index is 3. + */ +typedef bool (RETRO_CALLCONV *retro_replace_image_index_t)(unsigned index, + const struct retro_game_info *info); + +/* Adds a new valid index (get_num_images()) to the internal disk list. + * This will increment subsequent return values from get_num_images() by 1. + * This image index cannot be used until a disk image has been set + * with replace_image_index. */ +typedef bool (RETRO_CALLCONV *retro_add_image_index_t)(void); + +/* Sets initial image to insert in drive when calling + * core_load_game(). + * Since we cannot pass the initial index when loading + * content (this would require a major API change), this + * is set by the frontend *before* calling the core's + * retro_load_game()/retro_load_game_special() implementation. + * A core should therefore cache the index/path values and handle + * them inside retro_load_game()/retro_load_game_special(). + * - If 'index' is invalid (index >= get_num_images()), the + * core should ignore the set value and instead use 0 + * - 'path' is used purely for error checking - i.e. when + * content is loaded, the core should verify that the + * disk specified by 'index' has the specified file path. + * This is to guard against auto selecting the wrong image + * if (for example) the user should modify an existing M3U + * playlist. We have to let the core handle this because + * set_initial_image() must be called before loading content, + * i.e. the frontend cannot access image paths in advance + * and thus cannot perform the error check itself. + * If set path and content path do not match, the core should + * ignore the set 'index' value and instead use 0 + * Returns 'false' if index or 'path' are invalid, or core + * does not support this functionality + */ +typedef bool (RETRO_CALLCONV *retro_set_initial_image_t)(unsigned index, const char *path); + +/* Fetches the path of the specified disk image file. + * Returns 'false' if index is invalid (index >= get_num_images()) + * or path is otherwise unavailable. + */ +typedef bool (RETRO_CALLCONV *retro_get_image_path_t)(unsigned index, char *path, size_t len); + +/* Fetches a core-provided 'label' for the specified disk + * image file. In the simplest case this may be a file name + * (without extension), but for cores with more complex + * content requirements information may be provided to + * facilitate user disk swapping - for example, a core + * running floppy-disk-based content may uniquely label + * save disks, data disks, level disks, etc. with names + * corresponding to in-game disk change prompts (so the + * frontend can provide better user guidance than a 'dumb' + * disk index value). + * Returns 'false' if index is invalid (index >= get_num_images()) + * or label is otherwise unavailable. + */ +typedef bool (RETRO_CALLCONV *retro_get_image_label_t)(unsigned index, char *label, size_t len); + +struct retro_disk_control_callback +{ + retro_set_eject_state_t set_eject_state; + retro_get_eject_state_t get_eject_state; + + retro_get_image_index_t get_image_index; + retro_set_image_index_t set_image_index; + retro_get_num_images_t get_num_images; + + retro_replace_image_index_t replace_image_index; + retro_add_image_index_t add_image_index; +}; + +struct retro_disk_control_ext_callback +{ + retro_set_eject_state_t set_eject_state; + retro_get_eject_state_t get_eject_state; + + retro_get_image_index_t get_image_index; + retro_set_image_index_t set_image_index; + retro_get_num_images_t get_num_images; + + retro_replace_image_index_t replace_image_index; + retro_add_image_index_t add_image_index; + + /* NOTE: Frontend will only attempt to record/restore + * last used disk index if both set_initial_image() + * and get_image_path() are implemented */ + retro_set_initial_image_t set_initial_image; /* Optional - may be NULL */ + + retro_get_image_path_t get_image_path; /* Optional - may be NULL */ + retro_get_image_label_t get_image_label; /* Optional - may be NULL */ +}; + +/* Definitions for RETRO_ENVIRONMENT_SET_NETPACKET_INTERFACE. + * A core can set it if sending and receiving custom network packets + * during a multiplayer session is desired. + */ + +/* Netpacket flags for retro_netpacket_send_t */ +#define RETRO_NETPACKET_UNRELIABLE 0 /* Packet to be sent unreliable, depending on network quality it might not arrive. */ +#define RETRO_NETPACKET_RELIABLE (1 << 0) /* Reliable packets are guaranteed to arrive at the target in the order they were send. */ +#define RETRO_NETPACKET_UNSEQUENCED (1 << 1) /* Packet will not be sequenced with other packets and may arrive out of order. Cannot be set on reliable packets. */ + +/* Used by the core to send a packet to one or more connected players. + * A single packet sent via this interface can contain up to 64 KB of data. + * + * The broadcast flag can be set to true to send to multiple connected clients. + * In a broadcast, the client_id argument indicates 1 client NOT to send the + * packet to (pass 0xFFFF to send to everyone). Otherwise, the client_id + * argument indicates a single client to send the packet to. + * + * A frontend must support sending reliable packets (RETRO_NETPACKET_RELIABLE). + * Unreliable packets might not be supported by the frontend, but the flags can + * still be specified. Reliable transmission will be used instead. + * + * If this function is called passing NULL for buf, it will instead flush all + * previously buffered outgoing packets and instantly read any incoming packets. + * During such a call, retro_netpacket_receive_t and retro_netpacket_stop_t can + * be called. The core can perform this in a loop to do a blocking read, i.e., + * wait for incoming data, but needs to handle stop getting called and also + * give up after a short while to avoid freezing on a connection problem. + * + * This function is not guaranteed to be thread-safe and must be called during + * retro_run or any of the netpacket callbacks passed with this interface. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_send_t)(int flags, const void* buf, size_t len, uint16_t client_id, bool broadcast); + +/* Called by the frontend to signify that a multiplayer session has started. + * If client_id is 0 the local player is the host of the session and at this + * point no other player has connected yet. + * + * If client_id is > 0 the local player is a client connected to a host and + * at this point is already fully connected to the host. + * + * The core must store the retro_netpacket_send_t function pointer provided + * here and use it whenever it wants to send a packet. This function pointer + * remains valid until the frontend calls retro_netpacket_stop_t. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_start_t)(uint16_t client_id, retro_netpacket_send_t send_fn); + +/* Called by the frontend when a new packet arrives which has been sent from + * another player with retro_netpacket_send_t. The client_id argument indicates + * who has sent the packet. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_receive_t)(const void* buf, size_t len, uint16_t client_id); + +/* Called by the frontend when the multiplayer session has ended. + * Once this gets called the retro_netpacket_send_t function pointer passed + * to retro_netpacket_start_t will not be valid anymore. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_stop_t)(void); + +/* Called by the frontend every frame (between calls to retro_run while + * updating the state of the multiplayer session. + * This is a good place for the core to call retro_netpacket_send_t from. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_poll_t)(void); + +/* Called by the frontend when a new player connects to the hosted session. + * This is only called on the host side, not for clients connected to the host. + * If this function returns false, the newly connected player gets dropped. + * This can be used for example to limit the number of players. + */ +typedef bool (RETRO_CALLCONV *retro_netpacket_connected_t)(uint16_t client_id); + +/* Called by the frontend when a player leaves or disconnects from the hosted session. + * This is only called on the host side, not for clients connected to the host. + */ +typedef void (RETRO_CALLCONV *retro_netpacket_disconnected_t)(uint16_t client_id); + +/** + * A callback interface for giving a core the ability to send and receive custom + * network packets during a multiplayer session between two or more instances + * of a libretro frontend. + * + * @see RETRO_ENVIRONMENT_SET_NETPACKET_INTERFACE + */ +struct retro_netpacket_callback +{ + retro_netpacket_start_t start; + retro_netpacket_receive_t receive; + retro_netpacket_stop_t stop; /* Optional - may be NULL */ + retro_netpacket_poll_t poll; /* Optional - may be NULL */ + retro_netpacket_connected_t connected; /* Optional - may be NULL */ + retro_netpacket_disconnected_t disconnected; /* Optional - may be NULL */ +}; + +enum retro_pixel_format +{ + /* 0RGB1555, native endian. + * 0 bit must be set to 0. + * This pixel format is default for compatibility concerns only. + * If a 15/16-bit pixel format is desired, consider using RGB565. */ + RETRO_PIXEL_FORMAT_0RGB1555 = 0, + + /* XRGB8888, native endian. + * X bits are ignored. */ + RETRO_PIXEL_FORMAT_XRGB8888 = 1, + + /* RGB565, native endian. + * This pixel format is the recommended format to use if a 15/16-bit + * format is desired as it is the pixel format that is typically + * available on a wide range of low-power devices. + * + * It is also natively supported in APIs like OpenGL ES. */ + RETRO_PIXEL_FORMAT_RGB565 = 2, + + /* Ensure sizeof() == sizeof(int). */ + RETRO_PIXEL_FORMAT_UNKNOWN = INT_MAX +}; + +enum retro_savestate_context +{ + /* Standard savestate written to disk. */ + RETRO_SAVESTATE_CONTEXT_NORMAL = 0, + + /* Savestate where you are guaranteed that the same instance will load the save state. + * You can store internal pointers to code or data. + * It's still a full serialization and deserialization, and could be loaded or saved at any time. + * It won't be written to disk or sent over the network. + */ + RETRO_SAVESTATE_CONTEXT_RUNAHEAD_SAME_INSTANCE = 1, + + /* Savestate where you are guaranteed that the same emulator binary will load that savestate. + * You can skip anything that would slow down saving or loading state but you can not store internal pointers. + * It won't be written to disk or sent over the network. + * Example: "Second Instance" runahead + */ + RETRO_SAVESTATE_CONTEXT_RUNAHEAD_SAME_BINARY = 2, + + /* Savestate used within a rollback netplay feature. + * You should skip anything that would unnecessarily increase bandwidth usage. + * It won't be written to disk but it will be sent over the network. + */ + RETRO_SAVESTATE_CONTEXT_ROLLBACK_NETPLAY = 3, + + /* Ensure sizeof() == sizeof(int). */ + RETRO_SAVESTATE_CONTEXT_UNKNOWN = INT_MAX +}; + +struct retro_message +{ + const char *msg; /* Message to be displayed. */ + unsigned frames; /* Duration in frames of message. */ +}; + +enum retro_message_target +{ + RETRO_MESSAGE_TARGET_ALL = 0, + RETRO_MESSAGE_TARGET_OSD, + RETRO_MESSAGE_TARGET_LOG +}; + +enum retro_message_type +{ + RETRO_MESSAGE_TYPE_NOTIFICATION = 0, + RETRO_MESSAGE_TYPE_NOTIFICATION_ALT, + RETRO_MESSAGE_TYPE_STATUS, + RETRO_MESSAGE_TYPE_PROGRESS +}; + +struct retro_message_ext +{ + /* Message string to be displayed/logged */ + const char *msg; + /* Duration (in ms) of message when targeting the OSD */ + unsigned duration; + /* Message priority when targeting the OSD + * > When multiple concurrent messages are sent to + * the frontend and the frontend does not have the + * capacity to display them all, messages with the + * *highest* priority value should be shown + * > There is no upper limit to a message priority + * value (within the bounds of the unsigned data type) + * > In the reference frontend (RetroArch), the same + * priority values are used for frontend-generated + * notifications, which are typically assigned values + * between 0 and 3 depending upon importance */ + unsigned priority; + /* Message logging level (info, warn, error, etc.) */ + enum retro_log_level level; + /* Message destination: OSD, logging interface or both */ + enum retro_message_target target; + /* Message 'type' when targeting the OSD + * > RETRO_MESSAGE_TYPE_NOTIFICATION: Specifies that a + * message should be handled in identical fashion to + * a standard frontend-generated notification + * > RETRO_MESSAGE_TYPE_NOTIFICATION_ALT: Specifies that + * message is a notification that requires user attention + * or action, but that it should be displayed in a manner + * that differs from standard frontend-generated notifications. + * This would typically correspond to messages that should be + * displayed immediately (independently from any internal + * frontend message queue), and/or which should be visually + * distinguishable from frontend-generated notifications. + * For example, a core may wish to inform the user of + * information related to a disk-change event. It is + * expected that the frontend itself may provide a + * notification in this case; if the core sends a + * message of type RETRO_MESSAGE_TYPE_NOTIFICATION, an + * uncomfortable 'double-notification' may occur. A message + * of RETRO_MESSAGE_TYPE_NOTIFICATION_ALT should therefore + * be presented such that visual conflict with regular + * notifications does not occur + * > RETRO_MESSAGE_TYPE_STATUS: Indicates that message + * is not a standard notification. This typically + * corresponds to 'status' indicators, such as a core's + * internal FPS, which are intended to be displayed + * either permanently while a core is running, or in + * a manner that does not suggest user attention or action + * is required. 'Status' type messages should therefore be + * displayed in a different on-screen location and in a manner + * easily distinguishable from both standard frontend-generated + * notifications and messages of type RETRO_MESSAGE_TYPE_NOTIFICATION_ALT + * > RETRO_MESSAGE_TYPE_PROGRESS: Indicates that message reports + * the progress of an internal core task. For example, in cases + * where a core itself handles the loading of content from a file, + * this may correspond to the percentage of the file that has been + * read. Alternatively, an audio/video playback core may use a + * message of type RETRO_MESSAGE_TYPE_PROGRESS to display the current + * playback position as a percentage of the runtime. 'Progress' type + * messages should therefore be displayed as a literal progress bar, + * where: + * - 'retro_message_ext.msg' is the progress bar title/label + * - 'retro_message_ext.progress' determines the length of + * the progress bar + * NOTE: Message type is a *hint*, and may be ignored + * by the frontend. If a frontend lacks support for + * displaying messages via alternate means than standard + * frontend-generated notifications, it will treat *all* + * messages as having the type RETRO_MESSAGE_TYPE_NOTIFICATION */ + enum retro_message_type type; + /* Task progress when targeting the OSD and message is + * of type RETRO_MESSAGE_TYPE_PROGRESS + * > -1: Unmetered/indeterminate + * > 0-100: Current progress percentage + * NOTE: Since message type is a hint, a frontend may ignore + * progress values. Where relevant, a core should therefore + * include progress percentage within the message string, + * such that the message intent remains clear when displayed + * as a standard frontend-generated notification */ + int8_t progress; +}; + +/* Describes how the libretro implementation maps a libretro input bind + * to its internal input system through a human readable string. + * This string can be used to better let a user configure input. */ +struct retro_input_descriptor +{ + /* Associates given parameters with a description. */ + unsigned port; + unsigned device; + unsigned index; + unsigned id; + + /* Human readable description for parameters. + * The pointer must remain valid until + * retro_unload_game() is called. */ + const char *description; +}; + +struct retro_system_info +{ + /* All pointers are owned by libretro implementation, and pointers must + * remain valid until it is unloaded. */ + + const char *library_name; /* Descriptive name of library. Should not + * contain any version numbers, etc. */ + const char *library_version; /* Descriptive version of core. */ + + const char *valid_extensions; /* A string listing probably content + * extensions the core will be able to + * load, separated with pipe. + * I.e. "bin|rom|iso". + * Typically used for a GUI to filter + * out extensions. */ + + /* Libretro cores that need to have direct access to their content + * files, including cores which use the path of the content files to + * determine the paths of other files, should set need_fullpath to true. + * + * Cores should strive for setting need_fullpath to false, + * as it allows the frontend to perform patching, etc. + * + * If need_fullpath is true and retro_load_game() is called: + * - retro_game_info::path is guaranteed to have a valid path + * - retro_game_info::data and retro_game_info::size are invalid + * + * If need_fullpath is false and retro_load_game() is called: + * - retro_game_info::path may be NULL + * - retro_game_info::data and retro_game_info::size are guaranteed + * to be valid + * + * See also: + * - RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY + * - RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY + */ + bool need_fullpath; + + /* If true, the frontend is not allowed to extract any archives before + * loading the real content. + * Necessary for certain libretro implementations that load games + * from zipped archives. */ + bool block_extract; +}; + +/* Defines overrides which modify frontend handling of + * specific content file types. + * An array of retro_system_content_info_override is + * passed to RETRO_ENVIRONMENT_SET_CONTENT_INFO_OVERRIDE + * NOTE: In the following descriptions, references to + * retro_load_game() may be replaced with + * retro_load_game_special() */ +struct retro_system_content_info_override +{ + /* A list of file extensions for which the override + * should apply, delimited by a 'pipe' character + * (e.g. "md|sms|gg") + * Permitted file extensions are limited to those + * included in retro_system_info::valid_extensions + * and/or retro_subsystem_rom_info::valid_extensions */ + const char *extensions; + + /* Overrides the need_fullpath value set in + * retro_system_info and/or retro_subsystem_rom_info. + * To reiterate: + * + * If need_fullpath is true and retro_load_game() is called: + * - retro_game_info::path is guaranteed to contain a valid + * path to an existent file + * - retro_game_info::data and retro_game_info::size are invalid + * + * If need_fullpath is false and retro_load_game() is called: + * - retro_game_info::path may be NULL + * - retro_game_info::data and retro_game_info::size are guaranteed + * to be valid + * + * In addition: + * + * If need_fullpath is true and retro_load_game() is called: + * - retro_game_info_ext::full_path is guaranteed to contain a valid + * path to an existent file + * - retro_game_info_ext::archive_path may be NULL + * - retro_game_info_ext::archive_file may be NULL + * - retro_game_info_ext::dir is guaranteed to contain a valid path + * to the directory in which the content file exists + * - retro_game_info_ext::name is guaranteed to contain the + * basename of the content file, without extension + * - retro_game_info_ext::ext is guaranteed to contain the + * extension of the content file in lower case format + * - retro_game_info_ext::data and retro_game_info_ext::size + * are invalid + * + * If need_fullpath is false and retro_load_game() is called: + * - If retro_game_info_ext::file_in_archive is false: + * - retro_game_info_ext::full_path is guaranteed to contain + * a valid path to an existent file + * - retro_game_info_ext::archive_path may be NULL + * - retro_game_info_ext::archive_file may be NULL + * - retro_game_info_ext::dir is guaranteed to contain a + * valid path to the directory in which the content file exists + * - retro_game_info_ext::name is guaranteed to contain the + * basename of the content file, without extension + * - retro_game_info_ext::ext is guaranteed to contain the + * extension of the content file in lower case format + * - If retro_game_info_ext::file_in_archive is true: + * - retro_game_info_ext::full_path may be NULL + * - retro_game_info_ext::archive_path is guaranteed to + * contain a valid path to an existent compressed file + * inside which the content file is located + * - retro_game_info_ext::archive_file is guaranteed to + * contain a valid path to an existent content file + * inside the compressed file referred to by + * retro_game_info_ext::archive_path + * e.g. for a compressed file '/path/to/foo.zip' + * containing 'bar.sfc' + * > retro_game_info_ext::archive_path will be '/path/to/foo.zip' + * > retro_game_info_ext::archive_file will be 'bar.sfc' + * - retro_game_info_ext::dir is guaranteed to contain a + * valid path to the directory in which the compressed file + * (containing the content file) exists + * - retro_game_info_ext::name is guaranteed to contain + * EITHER + * 1) the basename of the compressed file (containing + * the content file), without extension + * OR + * 2) the basename of the content file inside the + * compressed file, without extension + * In either case, a core should consider 'name' to + * be the canonical name/ID of the the content file + * - retro_game_info_ext::ext is guaranteed to contain the + * extension of the content file inside the compressed file, + * in lower case format + * - retro_game_info_ext::data and retro_game_info_ext::size are + * guaranteed to be valid */ + bool need_fullpath; + + /* If need_fullpath is false, specifies whether the content + * data buffer available in retro_load_game() is 'persistent' + * + * If persistent_data is false and retro_load_game() is called: + * - retro_game_info::data and retro_game_info::size + * are valid only until retro_load_game() returns + * - retro_game_info_ext::data and retro_game_info_ext::size + * are valid only until retro_load_game() returns + * + * If persistent_data is true and retro_load_game() is called: + * - retro_game_info::data and retro_game_info::size + * are valid until retro_deinit() returns + * - retro_game_info_ext::data and retro_game_info_ext::size + * are valid until retro_deinit() returns */ + bool persistent_data; +}; + +/* Similar to retro_game_info, but provides extended + * information about the source content file and + * game memory buffer status. + * And array of retro_game_info_ext is returned by + * RETRO_ENVIRONMENT_GET_GAME_INFO_EXT + * NOTE: In the following descriptions, references to + * retro_load_game() may be replaced with + * retro_load_game_special() */ +struct retro_game_info_ext +{ + /* - If file_in_archive is false, contains a valid + * path to an existent content file (UTF-8 encoded) + * - If file_in_archive is true, may be NULL */ + const char *full_path; + + /* - If file_in_archive is false, may be NULL + * - If file_in_archive is true, contains a valid path + * to an existent compressed file inside which the + * content file is located (UTF-8 encoded) */ + const char *archive_path; + + /* - If file_in_archive is false, may be NULL + * - If file_in_archive is true, contain a valid path + * to an existent content file inside the compressed + * file referred to by archive_path (UTF-8 encoded) + * e.g. for a compressed file '/path/to/foo.zip' + * containing 'bar.sfc' + * > archive_path will be '/path/to/foo.zip' + * > archive_file will be 'bar.sfc' */ + const char *archive_file; + + /* - If file_in_archive is false, contains a valid path + * to the directory in which the content file exists + * (UTF-8 encoded) + * - If file_in_archive is true, contains a valid path + * to the directory in which the compressed file + * (containing the content file) exists (UTF-8 encoded) */ + const char *dir; + + /* Contains the canonical name/ID of the content file + * (UTF-8 encoded). Intended for use when identifying + * 'complementary' content named after the loaded file - + * i.e. companion data of a different format (a CD image + * required by a ROM), texture packs, internally handled + * save files, etc. + * - If file_in_archive is false, contains the basename + * of the content file, without extension + * - If file_in_archive is true, then string is + * implementation specific. A frontend may choose to + * set a name value of: + * EITHER + * 1) the basename of the compressed file (containing + * the content file), without extension + * OR + * 2) the basename of the content file inside the + * compressed file, without extension + * RetroArch sets the 'name' value according to (1). + * A frontend that supports routine loading of + * content from archives containing multiple unrelated + * content files may set the 'name' value according + * to (2). */ + const char *name; + + /* - If file_in_archive is false, contains the extension + * of the content file in lower case format + * - If file_in_archive is true, contains the extension + * of the content file inside the compressed file, + * in lower case format */ + const char *ext; + + /* String of implementation specific meta-data. */ + const char *meta; + + /* Memory buffer of loaded game content. Will be NULL: + * IF + * - retro_system_info::need_fullpath is true and + * retro_system_content_info_override::need_fullpath + * is unset + * OR + * - retro_system_content_info_override::need_fullpath + * is true */ + const void *data; + + /* Size of game content memory buffer, in bytes */ + size_t size; + + /* True if loaded content file is inside a compressed + * archive */ + bool file_in_archive; + + /* - If data is NULL, value is unset/ignored + * - If data is non-NULL: + * - If persistent_data is false, data and size are + * valid only until retro_load_game() returns + * - If persistent_data is true, data and size are + * are valid until retro_deinit() returns */ + bool persistent_data; +}; + +struct retro_game_geometry +{ + unsigned base_width; /* Nominal video width of game. */ + unsigned base_height; /* Nominal video height of game. */ + unsigned max_width; /* Maximum possible width of game. */ + unsigned max_height; /* Maximum possible height of game. */ + + float aspect_ratio; /* Nominal aspect ratio of game. If + * aspect_ratio is <= 0.0, an aspect ratio + * of base_width / base_height is assumed. + * A frontend could override this setting, + * if desired. */ +}; + +struct retro_system_timing +{ + double fps; /* FPS of video content. */ + double sample_rate; /* Sampling rate of audio. */ +}; + +struct retro_system_av_info +{ + struct retro_game_geometry geometry; + struct retro_system_timing timing; +}; + +struct retro_variable +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. + * If NULL, obtains the complete environment string if more + * complex parsing is necessary. + * The environment string is formatted as key-value pairs + * delimited by semicolons as so: + * "key1=value1;key2=value2;..." + */ + const char *key; + + /* Value to be obtained. If key does not exist, it is set to NULL. */ + const char *value; +}; + +struct retro_core_option_display +{ + /* Variable to configure in RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY */ + const char *key; + + /* Specifies whether variable should be displayed + * when presenting core options to the user */ + bool visible; +}; + +/* Maximum number of values permitted for a core option + * > Note: We have to set a maximum value due the limitations + * of the C language - i.e. it is not possible to create an + * array of structs each containing a variable sized array, + * so the retro_core_option_definition values array must + * have a fixed size. The size limit of 128 is a balancing + * act - it needs to be large enough to support all 'sane' + * core options, but setting it too large may impact low memory + * platforms. In practise, if a core option has more than + * 128 values then the implementation is likely flawed. + * To quote the above API reference: + * "The number of possible options should be very limited + * i.e. it should be feasible to cycle through options + * without a keyboard." + */ +#define RETRO_NUM_CORE_OPTION_VALUES_MAX 128 + +struct retro_core_option_value +{ + /* Expected option value */ + const char *value; + + /* Human-readable value label. If NULL, value itself + * will be displayed by the frontend */ + const char *label; +}; + +struct retro_core_option_definition +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. */ + const char *key; + + /* Human-readable core option description (used as menu label) */ + const char *desc; + + /* Human-readable core option information (used as menu sublabel) */ + const char *info; + + /* Array of retro_core_option_value structs, terminated by NULL */ + struct retro_core_option_value values[RETRO_NUM_CORE_OPTION_VALUES_MAX]; + + /* Default core option value. Must match one of the values + * in the retro_core_option_value array, otherwise will be + * ignored */ + const char *default_value; +}; + +#ifdef __PS3__ +#undef local +#endif + +struct retro_core_options_intl +{ + /* Pointer to an array of retro_core_option_definition structs + * - US English implementation + * - Must point to a valid array */ + struct retro_core_option_definition *us; + + /* Pointer to an array of retro_core_option_definition structs + * - Implementation for current frontend language + * - May be NULL */ + struct retro_core_option_definition *local; +}; + +struct retro_core_option_v2_category +{ + /* Variable uniquely identifying the + * option category. Valid key characters + * are [a-z, A-Z, 0-9, _, -] */ + const char *key; + + /* Human-readable category description + * > Used as category menu label when + * frontend has core option category + * support */ + const char *desc; + + /* Human-readable category information + * > Used as category menu sublabel when + * frontend has core option category + * support + * > Optional (may be NULL or an empty + * string) */ + const char *info; +}; + +struct retro_core_option_v2_definition +{ + /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. + * Valid key characters are [a-z, A-Z, 0-9, _, -] */ + const char *key; + + /* Human-readable core option description + * > Used as menu label when frontend does + * not have core option category support + * e.g. "Video > Aspect Ratio" */ + const char *desc; + + /* Human-readable core option description + * > Used as menu label when frontend has + * core option category support + * e.g. "Aspect Ratio", where associated + * retro_core_option_v2_category::desc + * is "Video" + * > If empty or NULL, the string specified by + * desc will be used as the menu label + * > Will be ignored (and may be set to NULL) + * if category_key is empty or NULL */ + const char *desc_categorized; + + /* Human-readable core option information + * > Used as menu sublabel */ + const char *info; + + /* Human-readable core option information + * > Used as menu sublabel when frontend + * has core option category support + * (e.g. may be required when info text + * references an option by name/desc, + * and the desc/desc_categorized text + * for that option differ) + * > If empty or NULL, the string specified by + * info will be used as the menu sublabel + * > Will be ignored (and may be set to NULL) + * if category_key is empty or NULL */ + const char *info_categorized; + + /* Variable specifying category (e.g. "video", + * "audio") that will be assigned to the option + * if frontend has core option category support. + * > Categorized options will be displayed in a + * subsection/submenu of the frontend core + * option interface + * > Specified string must match one of the + * retro_core_option_v2_category::key values + * in the associated retro_core_option_v2_category + * array; If no match is not found, specified + * string will be considered as NULL + * > If specified string is empty or NULL, option will + * have no category and will be shown at the top + * level of the frontend core option interface */ + const char *category_key; + + /* Array of retro_core_option_value structs, terminated by NULL */ + struct retro_core_option_value values[RETRO_NUM_CORE_OPTION_VALUES_MAX]; + + /* Default core option value. Must match one of the values + * in the retro_core_option_value array, otherwise will be + * ignored */ + const char *default_value; +}; + +struct retro_core_options_v2 +{ + /* Array of retro_core_option_v2_category structs, + * terminated by NULL + * > If NULL, all entries in definitions array + * will have no category and will be shown at + * the top level of the frontend core option + * interface + * > Will be ignored if frontend does not have + * core option category support */ + struct retro_core_option_v2_category *categories; + + /* Array of retro_core_option_v2_definition structs, + * terminated by NULL */ + struct retro_core_option_v2_definition *definitions; +}; + +struct retro_core_options_v2_intl +{ + /* Pointer to a retro_core_options_v2 struct + * > US English implementation + * > Must point to a valid struct */ + struct retro_core_options_v2 *us; + + /* Pointer to a retro_core_options_v2 struct + * - Implementation for current frontend language + * - May be NULL */ + struct retro_core_options_v2 *local; +}; + +/* Used by the frontend to monitor changes in core option + * visibility. May be called each time any core option + * value is set via the frontend. + * - On each invocation, the core must update the visibility + * of any dynamically hidden options using the + * RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY environment + * callback. + * - On the first invocation, returns 'true' if the visibility + * of any core option has changed since the last call of + * retro_load_game() or retro_load_game_special(). + * - On each subsequent invocation, returns 'true' if the + * visibility of any core option has changed since the last + * time the function was called. */ +typedef bool (RETRO_CALLCONV *retro_core_options_update_display_callback_t)(void); +struct retro_core_options_update_display_callback +{ + retro_core_options_update_display_callback_t callback; +}; + +struct retro_game_info +{ + const char *path; /* Path to game, UTF-8 encoded. + * Sometimes used as a reference for building other paths. + * May be NULL if game was loaded from stdin or similar, + * but in this case some cores will be unable to load `data`. + * So, it is preferable to fabricate something here instead + * of passing NULL, which will help more cores to succeed. + * retro_system_info::need_fullpath requires + * that this path is valid. */ + const void *data; /* Memory buffer of loaded game. Will be NULL + * if need_fullpath was set. */ + size_t size; /* Size of memory buffer. */ + const char *meta; /* String of implementation specific meta-data. */ +}; + +#define RETRO_MEMORY_ACCESS_WRITE (1 << 0) + /* The core will write to the buffer provided by retro_framebuffer::data. */ +#define RETRO_MEMORY_ACCESS_READ (1 << 1) + /* The core will read from retro_framebuffer::data. */ +#define RETRO_MEMORY_TYPE_CACHED (1 << 0) + /* The memory in data is cached. + * If not cached, random writes and/or reading from the buffer is expected to be very slow. */ +struct retro_framebuffer +{ + void *data; /* The framebuffer which the core can render into. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. + The initial contents of data are unspecified. */ + unsigned width; /* The framebuffer width used by the core. Set by core. */ + unsigned height; /* The framebuffer height used by the core. Set by core. */ + size_t pitch; /* The number of bytes between the beginning of a scanline, + and beginning of the next scanline. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ + enum retro_pixel_format format; /* The pixel format the core must use to render into data. + This format could differ from the format used in + SET_PIXEL_FORMAT. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ + + unsigned access_flags; /* How the core will access the memory in the framebuffer. + RETRO_MEMORY_ACCESS_* flags. + Set by core. */ + unsigned memory_flags; /* Flags telling core how the memory has been mapped. + RETRO_MEMORY_TYPE_* flags. + Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */ +}; + +/* Used by a libretro core to override the current + * fastforwarding mode of the frontend */ +struct retro_fastforwarding_override +{ + /* Specifies the runtime speed multiplier that + * will be applied when 'fastforward' is true. + * For example, a value of 5.0 when running 60 FPS + * content will cap the fast-forward rate at 300 FPS. + * Note that the target multiplier may not be achieved + * if the host hardware has insufficient processing + * power. + * Setting a value of 0.0 (or greater than 0.0 but + * less than 1.0) will result in an uncapped + * fast-forward rate (limited only by hardware + * capacity). + * If the value is negative, it will be ignored + * (i.e. the frontend will use a runtime speed + * multiplier of its own choosing) */ + float ratio; + + /* If true, fastforwarding mode will be enabled. + * If false, fastforwarding mode will be disabled. */ + bool fastforward; + + /* If true, and if supported by the frontend, an + * on-screen notification will be displayed while + * 'fastforward' is true. + * If false, and if supported by the frontend, any + * on-screen fast-forward notifications will be + * suppressed */ + bool notification; + + /* If true, the core will have sole control over + * when fastforwarding mode is enabled/disabled; + * the frontend will not be able to change the + * state set by 'fastforward' until either + * 'inhibit_toggle' is set to false, or the core + * is unloaded */ + bool inhibit_toggle; +}; + +/* During normal operation. Rate will be equal to the core's internal FPS. */ +#define RETRO_THROTTLE_NONE 0 + +/* While paused or stepping single frames. Rate will be 0. */ +#define RETRO_THROTTLE_FRAME_STEPPING 1 + +/* During fast forwarding. + * Rate will be 0 if not specifically limited to a maximum speed. */ +#define RETRO_THROTTLE_FAST_FORWARD 2 + +/* During slow motion. Rate will be less than the core's internal FPS. */ +#define RETRO_THROTTLE_SLOW_MOTION 3 + +/* While rewinding recorded save states. Rate can vary depending on the rewind + * speed or be 0 if the frontend is not aiming for a specific rate. */ +#define RETRO_THROTTLE_REWINDING 4 + +/* While vsync is active in the video driver and the target refresh rate is + * lower than the core's internal FPS. Rate is the target refresh rate. */ +#define RETRO_THROTTLE_VSYNC 5 + +/* When the frontend does not throttle in any way. Rate will be 0. + * An example could be if no vsync or audio output is active. */ +#define RETRO_THROTTLE_UNBLOCKED 6 + +struct retro_throttle_state +{ + /* The current throttling mode. Should be one of the values above. */ + unsigned mode; + + /* How many times per second the frontend aims to call retro_run. + * Depending on the mode, it can be 0 if there is no known fixed rate. + * This won't be accurate if the total processing time of the core and + * the frontend is longer than what is available for one frame. */ + float rate; +}; + +/** + * Opaque handle to a microphone that's been opened for use. + * The underlying object is accessed or created with \c retro_microphone_interface_t. + */ +typedef struct retro_microphone retro_microphone_t; + +/** + * Parameters for configuring a microphone. + * Some of these might not be honored, + * depending on the available hardware and driver configuration. + */ +typedef struct retro_microphone_params +{ + /** + * The desired sample rate of the microphone's input, in Hz. + * The microphone's input will be resampled, + * so cores can ask for whichever frequency they need. + * + * If zero, some reasonable default will be provided by the frontend + * (usually from its config file). + * + * @see retro_get_mic_rate_t + */ + unsigned rate; +} retro_microphone_params_t; + +/** + * @copydoc retro_microphone_interface::open_mic + */ +typedef retro_microphone_t *(RETRO_CALLCONV *retro_open_mic_t)(const retro_microphone_params_t *params); + +/** + * @copydoc retro_microphone_interface::close_mic + */ +typedef void (RETRO_CALLCONV *retro_close_mic_t)(retro_microphone_t *microphone); + +/** + * @copydoc retro_microphone_interface::get_params + */ +typedef bool (RETRO_CALLCONV *retro_get_mic_params_t)(const retro_microphone_t *microphone, retro_microphone_params_t *params); + +/** + * @copydoc retro_microphone_interface::set_mic_state + */ +typedef bool (RETRO_CALLCONV *retro_set_mic_state_t)(retro_microphone_t *microphone, bool state); + +/** + * @copydoc retro_microphone_interface::get_mic_state + */ +typedef bool (RETRO_CALLCONV *retro_get_mic_state_t)(const retro_microphone_t *microphone); + +/** + * @copydoc retro_microphone_interface::read_mic + */ +typedef int (RETRO_CALLCONV *retro_read_mic_t)(retro_microphone_t *microphone, int16_t* samples, size_t num_samples); + +/** + * The current version of the microphone interface. + * Will be incremented whenever \c retro_microphone_interface or \c retro_microphone_params_t + * receive new fields. + * + * Frontends using cores built against older mic interface versions + * should not access fields introduced in newer versions. + */ +#define RETRO_MICROPHONE_INTERFACE_VERSION 1 + +/** + * An interface for querying the microphone and accessing data read from it. + * + * @see RETRO_ENVIRONMENT_GET_MICROPHONE_INTERFACE + */ +struct retro_microphone_interface +{ + /** + * The version of this microphone interface. + * Set by the core to request a particular version, + * and set by the frontend to indicate the returned version. + * 0 indicates that the interface is invalid or uninitialized. + */ + unsigned interface_version; + + /** + * Initializes a new microphone. + * Assuming that microphone support is enabled and provided by the frontend, + * cores may call this function whenever necessary. + * A microphone could be opened throughout a core's lifetime, + * or it could wait until a microphone is plugged in to the emulated device. + * + * The returned handle will be valid until it's freed, + * even if the audio driver is reinitialized. + * + * This function is not guaranteed to be thread-safe. + * + * @param args[in] Parameters used to create the microphone. + * May be \c NULL, in which case the default value of each parameter will be used. + * + * @returns Pointer to the newly-opened microphone, + * or \c NULL if one couldn't be opened. + * This likely means that no microphone is plugged in and recognized, + * or the maximum number of supported microphones has been reached. + * + * @note Microphones are \em inactive by default; + * to begin capturing audio, call \c set_mic_state. + * @see retro_microphone_params_t + */ + retro_open_mic_t open_mic; + + /** + * Closes a microphone that was initialized with \c open_mic. + * Calling this function will stop all microphone activity + * and free up the resources that it allocated. + * Afterwards, the handle is invalid and must not be used. + * + * A frontend may close opened microphones when unloading content, + * but this behavior is not guaranteed. + * Cores should close their microphones when exiting, just to be safe. + * + * @param microphone Pointer to the microphone that was allocated by \c open_mic. + * If \c NULL, this function does nothing. + * + * @note The handle might be reused if another microphone is opened later. + */ + retro_close_mic_t close_mic; + + /** + * Returns the configured parameters of this microphone. + * These may differ from what was requested depending on + * the driver and device configuration. + * + * Cores should check these values before they start fetching samples. + * + * Will not change after the mic was opened. + * + * @param microphone[in] Opaque handle to the microphone + * whose parameters will be retrieved. + * @param params[out] The parameters object that the + * microphone's parameters will be copied to. + * + * @return \c true if the parameters were retrieved, + * \c false if there was an error. + */ + retro_get_mic_params_t get_params; + + /** + * Enables or disables the given microphone. + * Microphones are disabled by default + * and must be explicitly enabled before they can be used. + * Disabled microphones will not process incoming audio samples, + * and will therefore have minimal impact on overall performance. + * Cores may enable microphones throughout their lifetime, + * or only for periods where they're needed. + * + * Cores that accept microphone input should be able to operate without it; + * we suggest substituting silence in this case. + * + * @param microphone Opaque handle to the microphone + * whose state will be adjusted. + * This will have been provided by \c open_mic. + * @param state \c true if the microphone should receive audio input, + * \c false if it should be idle. + * @returns \c true if the microphone's state was successfully set, + * \c false if \c microphone is invalid + * or if there was an error. + */ + retro_set_mic_state_t set_mic_state; + + /** + * Queries the active state of a microphone at the given index. + * Will return whether the microphone is enabled, + * even if the driver is paused. + * + * @param microphone Opaque handle to the microphone + * whose state will be queried. + * @return \c true if the provided \c microphone is valid and active, + * \c false if not or if there was an error. + */ + retro_get_mic_state_t get_mic_state; + + /** + * Retrieves the input processed by the microphone since the last call. + * \em Must be called every frame unless \c microphone is disabled, + * similar to how \c retro_audio_sample_batch_t works. + * + * @param[in] microphone Opaque handle to the microphone + * whose recent input will be retrieved. + * @param[out] samples The buffer that will be used to store the microphone's data. + * Microphone input is in mono (i.e. one number per sample). + * Should be large enough to accommodate the expected number of samples per frame; + * for example, a 44.1kHz sample rate at 60 FPS would require space for 735 samples. + * @param[in] num_samples The size of the data buffer in samples (\em not bytes). + * Microphone input is in mono, so a "frame" and a "sample" are equivalent in length here. + * + * @return The number of samples that were copied into \c samples. + * If \c microphone is pending driver initialization, + * this function will copy silence of the requested length into \c samples. + * + * Will return -1 if the microphone is disabled, + * the audio driver is paused, + * or there was an error. + */ + retro_read_mic_t read_mic; +}; + +/** + * Describes how a device is being powered. + * @see RETRO_ENVIRONMENT_GET_DEVICE_POWER + */ +enum retro_power_state +{ + /** + * Indicates that the frontend cannot report its power state at this time, + * most likely due to a lack of support. + * + * \c RETRO_ENVIRONMENT_GET_DEVICE_POWER will not return this value; + * instead, the environment callback will return \c false. + */ + RETRO_POWERSTATE_UNKNOWN = 0, + + /** + * Indicates that the device is running on its battery. + * Usually applies to portable devices such as handhelds, laptops, and smartphones. + */ + RETRO_POWERSTATE_DISCHARGING, + + /** + * Indicates that the device's battery is currently charging. + */ + RETRO_POWERSTATE_CHARGING, + + /** + * Indicates that the device is connected to a power source + * and that its battery has finished charging. + */ + RETRO_POWERSTATE_CHARGED, + + /** + * Indicates that the device is connected to a power source + * and that it does not have a battery. + * This usually suggests a desktop computer or a non-portable game console. + */ + RETRO_POWERSTATE_PLUGGED_IN +}; + +/** + * Indicates that an estimate is not available for the battery level or time remaining, + * even if the actual power state is known. + */ +#define RETRO_POWERSTATE_NO_ESTIMATE (-1) + +/** + * Describes the power state of the device running the frontend. + * @see RETRO_ENVIRONMENT_GET_DEVICE_POWER + */ +struct retro_device_power +{ + /** + * The current state of the frontend's power usage. + */ + enum retro_power_state state; + + /** + * A rough estimate of the amount of time remaining (in seconds) + * before the device powers off. + * This value depends on a variety of factors, + * so it is not guaranteed to be accurate. + * + * Will be set to \c RETRO_POWERSTATE_NO_ESTIMATE if \c state does not equal \c RETRO_POWERSTATE_DISCHARGING. + * May still be set to \c RETRO_POWERSTATE_NO_ESTIMATE if the frontend is unable to provide an estimate. + */ + int seconds; + + /** + * The approximate percentage of battery charge, + * ranging from 0 to 100 (inclusive). + * The device may power off before this reaches 0. + * + * The user might have configured their device + * to stop charging before the battery is full, + * so do not assume that this will be 100 in the \c RETRO_POWERSTATE_CHARGED state. + */ + int8_t percent; +}; + +/* Callbacks */ + +/* Environment callback. Gives implementations a way of performing + * uncommon tasks. Extensible. */ +typedef bool (RETRO_CALLCONV *retro_environment_t)(unsigned cmd, void *data); + +/* Render a frame. Pixel format is 15-bit 0RGB1555 native endian + * unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). + * + * Width and height specify dimensions of buffer. + * Pitch specifices length in bytes between two lines in buffer. + * + * For performance reasons, it is highly recommended to have a frame + * that is packed in memory, i.e. pitch == width * byte_per_pixel. + * Certain graphic APIs, such as OpenGL ES, do not like textures + * that are not packed in memory. + */ +typedef void (RETRO_CALLCONV *retro_video_refresh_t)(const void *data, unsigned width, + unsigned height, size_t pitch); + +/* Renders a single audio frame. Should only be used if implementation + * generates a single sample at a time. + * Format is signed 16-bit native endian. + */ +typedef void (RETRO_CALLCONV *retro_audio_sample_t)(int16_t left, int16_t right); + +/* Renders multiple audio frames in one go. + * + * One frame is defined as a sample of left and right channels, interleaved. + * I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames. + * Only one of the audio callbacks must ever be used. + */ +typedef size_t (RETRO_CALLCONV *retro_audio_sample_batch_t)(const int16_t *data, + size_t frames); + +/* Polls input. */ +typedef void (RETRO_CALLCONV *retro_input_poll_t)(void); + +/* Queries for input for player 'port'. device will be masked with + * RETRO_DEVICE_MASK. + * + * Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that + * have been set with retro_set_controller_port_device() + * will still use the higher level RETRO_DEVICE_JOYPAD to request input. + */ +typedef int16_t (RETRO_CALLCONV *retro_input_state_t)(unsigned port, unsigned device, + unsigned index, unsigned id); + +/* Sets callbacks. retro_set_environment() is guaranteed to be called + * before retro_init(). + * + * The rest of the set_* functions are guaranteed to have been called + * before the first call to retro_run() is made. */ +RETRO_API void retro_set_environment(retro_environment_t); +RETRO_API void retro_set_video_refresh(retro_video_refresh_t); +RETRO_API void retro_set_audio_sample(retro_audio_sample_t); +RETRO_API void retro_set_audio_sample_batch(retro_audio_sample_batch_t); +RETRO_API void retro_set_input_poll(retro_input_poll_t); +RETRO_API void retro_set_input_state(retro_input_state_t); + +/* Library global initialization/deinitialization. */ +RETRO_API void retro_init(void); +RETRO_API void retro_deinit(void); + +/* Must return RETRO_API_VERSION. Used to validate ABI compatibility + * when the API is revised. */ +RETRO_API unsigned retro_api_version(void); + +/* Gets statically known system info. Pointers provided in *info + * must be statically allocated. + * Can be called at any time, even before retro_init(). */ +RETRO_API void retro_get_system_info(struct retro_system_info *info); + +/* Gets information about system audio/video timings and geometry. + * Can be called only after retro_load_game() has successfully completed. + * NOTE: The implementation of this function might not initialize every + * variable if needed. + * E.g. geom.aspect_ratio might not be initialized if core doesn't + * desire a particular aspect ratio. */ +RETRO_API void retro_get_system_av_info(struct retro_system_av_info *info); + +/* Sets device to be used for player 'port'. + * By default, RETRO_DEVICE_JOYPAD is assumed to be plugged into all + * available ports. + * Setting a particular device type is not a guarantee that libretro cores + * will only poll input based on that particular device type. It is only a + * hint to the libretro core when a core cannot automatically detect the + * appropriate input device type on its own. It is also relevant when a + * core can change its behavior depending on device type. + * + * As part of the core's implementation of retro_set_controller_port_device, + * the core should call RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS to notify the + * frontend if the descriptions for any controls have changed as a + * result of changing the device type. + */ +RETRO_API void retro_set_controller_port_device(unsigned port, unsigned device); + +/* Resets the current game. */ +RETRO_API void retro_reset(void); + +/* Runs the game for one video frame. + * During retro_run(), input_poll callback must be called at least once. + * + * If a frame is not rendered for reasons where a game "dropped" a frame, + * this still counts as a frame, and retro_run() should explicitly dupe + * a frame if GET_CAN_DUPE returns true. + * In this case, the video callback can take a NULL argument for data. + */ +RETRO_API void retro_run(void); + +/* Returns the amount of data the implementation requires to serialize + * internal state (save states). + * Between calls to retro_load_game() and retro_unload_game(), the + * returned size is never allowed to be larger than a previous returned + * value, to ensure that the frontend can allocate a save state buffer once. + */ +RETRO_API size_t retro_serialize_size(void); + +/* Serializes internal state. If failed, or size is lower than + * retro_serialize_size(), it should return false, true otherwise. */ +RETRO_API bool retro_serialize(void *data, size_t size); +RETRO_API bool retro_unserialize(const void *data, size_t size); + +RETRO_API void retro_cheat_reset(void); +RETRO_API void retro_cheat_set(unsigned index, bool enabled, const char *code); + +/* Loads a game. + * Return true to indicate successful loading and false to indicate load failure. + */ +RETRO_API bool retro_load_game(const struct retro_game_info *game); + +/* Loads a "special" kind of game. Should not be used, + * except in extreme cases. */ +RETRO_API bool retro_load_game_special( + unsigned game_type, + const struct retro_game_info *info, size_t num_info +); + +/* Unloads the currently loaded game. Called before retro_deinit(void). */ +RETRO_API void retro_unload_game(void); + +/* Gets region of game. */ +RETRO_API unsigned retro_get_region(void); + +/* Gets region of memory. */ +RETRO_API void *retro_get_memory_data(unsigned id); +RETRO_API size_t retro_get_memory_size(unsigned id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/metal-cpp b/third_party/metal-cpp new file mode 160000 index 00000000..a63bd172 --- /dev/null +++ b/third_party/metal-cpp @@ -0,0 +1 @@ +Subproject commit a63bd172ddcba73a3d87ca32032b66ad41ddb9a6 diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index f368f573..607815fa 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -355,111 +355,152 @@ namespace OpenGL { } }; - enum ShaderType { - Fragment = GL_FRAGMENT_SHADER, - Vertex = GL_VERTEX_SHADER, - Geometry = GL_GEOMETRY_SHADER, - Compute = GL_COMPUTE_SHADER, - TessControl = GL_TESS_CONTROL_SHADER, - TessEvaluation = GL_TESS_EVALUATION_SHADER - }; + enum ShaderType { + Fragment = GL_FRAGMENT_SHADER, + Vertex = GL_VERTEX_SHADER, + Geometry = GL_GEOMETRY_SHADER, + Compute = GL_COMPUTE_SHADER, + TessControl = GL_TESS_CONTROL_SHADER, + TessEvaluation = GL_TESS_EVALUATION_SHADER + }; - struct Shader { - GLuint m_handle = 0; + struct Shader { + GLuint m_handle = 0; - Shader() {} - Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } + Shader() {} + Shader(const std::string_view source, ShaderType type) { create(source, static_cast(type)); } - // Returns whether compilation failed or not - bool create(const std::string_view source, GLenum type) { - m_handle = glCreateShader(type); - const GLchar* const sources[1] = { source.data() }; + // Returns whether compilation failed or not + bool create(const std::string_view source, GLenum type) { + m_handle = glCreateShader(type); + const GLchar* const sources[1] = {source.data()}; - glShaderSource(m_handle, 1, sources, nullptr); - glCompileShader(m_handle); + glShaderSource(m_handle, 1, sources, nullptr); + glCompileShader(m_handle); - GLint success; - glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); - if (success == GL_FALSE) { - char buf[4096]; - glGetShaderInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); - glDeleteShader(m_handle); + GLint success; + glGetShaderiv(m_handle, GL_COMPILE_STATUS, &success); + if (success == GL_FALSE) { + char buf[4096]; + glGetShaderInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to compile shader\nError: %s\n", buf); + glDeleteShader(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + + void free() { + if (exists()) { + glDeleteShader(m_handle); + m_handle = 0; + } + } + +#ifdef OPENGL_DESTRUCTORS + ~Shader() { free(); } +#endif + }; struct Program { - GLuint m_handle = 0; + GLuint m_handle = 0; - bool create(std::initializer_list> shaders) { - m_handle = glCreateProgram(); - for (const auto& shader : shaders) { - glAttachShader(m_handle, shader.get().handle()); - } + bool create(std::initializer_list> shaders) { + m_handle = glCreateProgram(); + for (const auto& shader : shaders) { + glAttachShader(m_handle, shader.get().handle()); + } - glLinkProgram(m_handle); - GLint success; - glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + glLinkProgram(m_handle); + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); - if (!success) { - char buf[4096]; - glGetProgramInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to link program\nError: %s\n", buf); - glDeleteProgram(m_handle); + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void use() const { glUseProgram(m_handle); } - }; + bool createFromBinary(const uint8_t* binary, size_t size, GLenum format) { + m_handle = glCreateProgram(); + glProgramBinary(m_handle, format, binary, size); + + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); + + m_handle = 0; + } + + return m_handle != 0; + } + + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } + + void free() { + if (exists()) { + glDeleteProgram(m_handle); + m_handle = 0; + } + } + +#ifdef OPENGL_DESTRUCTORS + ~Program() { free(); } +#endif + }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { glDispatchCompute(groupsX, groupsY, groupsZ); } - struct VertexBuffer { - GLuint m_handle = 0; + struct VertexBuffer { + GLuint m_handle = 0; - void create() { - if (m_handle == 0) { - glGenBuffers(1, &m_handle); - } - } + void create() { + if (m_handle == 0) { + glGenBuffers(1, &m_handle); + } + } - void createFixedSize(GLsizei size, GLenum usage = GL_DYNAMIC_DRAW) { - create(); - bind(); - glBufferData(GL_ARRAY_BUFFER, size, nullptr, usage); - } + void createFixedSize(GLsizei size, GLenum usage = GL_DYNAMIC_DRAW) { + create(); + bind(); + glBufferData(GL_ARRAY_BUFFER, size, nullptr, usage); + } - VertexBuffer(bool shouldCreate = false) { - if (shouldCreate) { - create(); - } - } + VertexBuffer(bool shouldCreate = false) { + if (shouldCreate) { + create(); + } + } #ifdef OPENGL_DESTRUCTORS - ~VertexBuffer() { free(); } -#endif - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } - void free() { glDeleteBuffers(1, &m_handle); } + ~VertexBuffer() { free(); } +#endif + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } + void free() { glDeleteBuffers(1, &m_handle); } - // Reallocates the buffer on every call. Prefer the sub version if possible. + // Reallocates the buffer on every call. Prefer the sub version if possible. template void bufferVerts(VertType* vertices, int vertCount, GLenum usage = GL_DYNAMIC_DRAW) { glBufferData(GL_ARRAY_BUFFER, sizeof(VertType) * vertCount, vertices, usage); @@ -471,7 +512,7 @@ namespace OpenGL { glBufferSubData(GL_ARRAY_BUFFER, offset, sizeof(VertType) * vertCount, vertices); } - // If C++20 is available, add overloads that take std::span instead of raw pointers + // If C++20 is available, add overloads that take std::span instead of raw pointers #ifdef OPENGL_HAVE_CPP20 template void bufferVerts(std::span vertices, GLenum usage = GL_DYNAMIC_DRAW) { @@ -485,6 +526,48 @@ namespace OpenGL { #endif }; + struct UniformBuffer { + GLuint m_handle = 0; + + void create() { + if (m_handle == 0) { + glGenBuffers(1, &m_handle); + } + } + + void createFixedSize(GLsizei size, GLenum usage = GL_DYNAMIC_DRAW) { + create(); + bind(); + glBufferData(GL_UNIFORM_BUFFER, size, nullptr, usage); + } + + UniformBuffer(bool shouldCreate = false) { + if (shouldCreate) { + create(); + } + } + +#ifdef OPENGL_DESTRUCTORS + ~UniformBuffer() { free(); } +#endif + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void bind() const { glBindBuffer(GL_UNIFORM_BUFFER, m_handle); } + void free() { glDeleteBuffers(1, &m_handle); } + + // Reallocates the buffer on every call. Prefer the sub version if possible. + template + void buffer(const UniformType& uniformData, GLenum usage = GL_DYNAMIC_DRAW) { + glBufferData(GL_UNIFORM_BUFFER, sizeof(uniformData), &uniformData, usage); + } + + // Only use if you used createFixedSize + template + void bufferSub(const UniformType& uniformData, int vertCount, GLintptr offset = 0) { + glBufferSubData(GL_UNIFORM_BUFFER, offset, sizeof(uniformData), &uniformData); + } + }; + enum DepthFunc { Never = GL_NEVER, // Depth test never passes Always = GL_ALWAYS, // Depth test always passes @@ -693,4 +776,4 @@ namespace OpenGL { using Rect = Rectangle; -} // end namespace OpenGL \ No newline at end of file +} // end namespace OpenGL diff --git a/third_party/renderdoc/renderdoc_app.h b/third_party/renderdoc/renderdoc_app.h new file mode 100644 index 00000000..e73f1c90 --- /dev/null +++ b/third_party/renderdoc/renderdoc_app.h @@ -0,0 +1,721 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html +// + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define RENDERDOC_CC __cdecl +#elif defined(__linux__) || defined(__FreeBSD__) +#define RENDERDOC_CC +#elif defined(__APPLE__) +#define RENDERDOC_CC +#else +#error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Constants not used directly in below API + +// This is a GUID/magic value used for when applications pass a path where shader debug +// information can be found to match up with a stripped shader. +// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue = +// RENDERDOC_ShaderDebugMagicValue_value +#define RENDERDOC_ShaderDebugMagicValue_struct \ + { 0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff } + +// as an alternative when you want a byte array (assuming x86 endianness): +#define RENDERDOC_ShaderDebugMagicValue_bytearray \ + { 0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff } + +// truncated version when only a uint64_t is available (e.g. Vulkan tags): +#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum RENDERDOC_CaptureOption { + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_APIValidation = 2, + eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from actions. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for actions. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify buffer access. This includes checking the memory returned by a Map() call to + // detect any out-of-bounds modification, as well as initialising buffers with undefined contents + // to a marker value to catch use of uninitialised memory. + // + // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do + // not do the same kind of interception & checking and undefined contents are really undefined. + // + // Default - disabled + // + // 1 - Verify buffer access + // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in + // RenderDoc. + eRENDERDOC_Option_VerifyBufferAccess = 6, + + // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites. + // This option now controls the filling of uninitialised buffers with 0xdddddddd which was + // previously always enabled + eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final capture necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the capture + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or + // getting it will be ignored, to allow compatibility with older versions. + // In v1.1 the option acts as if it's always enabled. + // + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // NOTE: This is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the API validation mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + + // Option to allow vendor extensions to be used even when they may be + // incompatible with RenderDoc and cause corrupted replays or crashes. + // + // Default - inactive + // + // No values are documented, this option should only be used when absolutely + // necessary as directed by a RenderDoc developer. + eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum RENDERDOC_InputButton { + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum RENDERDOC_OverlayBits { + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = + (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate | eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = ~0U, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(); +// sets the overlay bits with an and & or mask +typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to remove RenderDoc's hooks in the application. +// +// Note: that this can only work correctly if done immediately after +// the module is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called, and there is still no guarantee of +// success. +typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(); + +// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers. +typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown; + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(); + +// Sets the capture file path template +// +// pathtemplate is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetCaptureFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate); + +// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string +typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers. +typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate; +typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate; + +// returns the number of captures that have been made +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// filename will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the filename string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// capture path may not exist anymore. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename, uint32_t *pathlength, uint64_t *timestamp); + +// Sets the comments associated with a capture file. These comments are displayed in the +// UI program when opening. +// +// filePath should be a path to the capture file to add comments to. If set to NULL or "" +// the most recent capture file created made will be used instead. +// comments should be a NULL-terminated UTF-8 string to add as comments. +// +// Any existing comments will be overwritten. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath, const char *comments); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(); + +// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers. +// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for +// backwards compatibility with old code, it is castable either way since it's ABI compatible +// as the same function pointer type. +typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected; + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl, const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void *RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void *RENDERDOC_WindowHandle; + +// A helper macro for Vulkan, where the device handle cannot be used directly. +// +// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use. +// +// Specifically, the value needed is the dispatch table pointer, which sits as the first +// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and +// indirect once. +#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst))) + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// capture the next frame on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(); + +// capture the next N frames on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Ends capturing immediately and discard any data stored without saving to disk. +// +// This will return 1 if the capture was discarded, and 0 if there was an error or no capture +// was in progress +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, RENDERDOC_WindowHandle wndHandle); + +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. +// +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum RENDERDOC_Version { + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 + eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 + eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00 + eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01 + eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02 + eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00 + eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00 + eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00 + eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 + eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 + eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. +// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation +// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new +// function pointer is added to the end of the struct, the original layout is identical +// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote +// replay/remote server concept in replay UI) +// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these +// are captures and not debug logging files. This is the first API version in the v1.0 +// branch. +// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be +// displayed in the UI program on load. +// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions +// which allows users to opt-in to allowing unsupported vendor extensions to function. +// Should be used at the user's own risk. +// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to +// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to +// 0xdddddddd of uninitialised buffer contents. +// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop +// capturing without saving anything to disk. +// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening +// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. +// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() + +typedef struct RENDERDOC_API_1_6_0 { + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + // Shutdown was renamed to RemoveHooks in 1.4.1. + // These unions allow old code to continue compiling without changes + union { + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_RemoveHooks RemoveHooks; + }; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2. + // These unions allow old code to continue compiling without changes + union { + // deprecated name + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + // current name + pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate; + }; + union { + // deprecated name + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + // current name + pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate; + }; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1. + // This union allows old code to continue compiling without changes + union { + // deprecated name + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + // current name + pRENDERDOC_IsTargetControlConnected IsTargetControlConnected; + }; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; + + // new function in 1.1.0 + pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture; + + // new function in 1.2.0 + pRENDERDOC_SetCaptureFileComments SetCaptureFileComments; + + // new function in 1.4.0 + pRENDERDOC_DiscardFrameCapture DiscardFrameCapture; + + // new function in 1.5.0 + pRENDERDOC_ShowReplayUI ShowReplayUI; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; +} RENDERDOC_API_1_6_0; + +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif \ No newline at end of file