Merge remote-tracking branch 'upstream/master' into memory_rework
293
.github/gles.patch
vendored
|
@ -1,52 +1,3 @@
|
|||
diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp
|
||||
index a11a6ffa..77486a09 100644
|
||||
--- a/src/core/renderer_gl/renderer_gl.cpp
|
||||
+++ b/src/core/renderer_gl/renderer_gl.cpp
|
||||
@@ -357,27 +357,27 @@ void RendererGL::bindTexturesToSlots() {
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 3);
|
||||
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void RendererGL::updateLightingLUT() {
|
||||
- gpu.lightingLUTDirty = false;
|
||||
- std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
-
|
||||
- for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
- uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
- u16_lightinglut[i] = value * 65535 / 4095;
|
||||
- }
|
||||
-
|
||||
- glActiveTexture(GL_TEXTURE0 + 3);
|
||||
- glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
- glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
- glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
- glActiveTexture(GL_TEXTURE0);
|
||||
+ // gpu.lightingLUTDirty = false;
|
||||
+ // std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
+
|
||||
+ // for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
+ // uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
+ // u16_lightinglut[i] = value * 65535 / 4095;
|
||||
+ // }
|
||||
+
|
||||
+ // glActiveTexture(GL_TEXTURE0 + 3);
|
||||
+ // glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
|
||||
+ // glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
+ // glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
+ // glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
|
||||
diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag
|
||||
index 612671c8..1937f711 100644
|
||||
--- a/src/host_shaders/opengl_display.frag
|
||||
|
@ -70,7 +21,7 @@ index 990e2f80..2e7842ac 100644
|
|||
|
||||
void main() {
|
||||
diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag
|
||||
index f6fa6c55..bb88e278 100644
|
||||
index 9f07df0b..96a35afa 100644
|
||||
--- a/src/host_shaders/opengl_fragment_shader.frag
|
||||
+++ b/src/host_shaders/opengl_fragment_shader.frag
|
||||
@@ -1,4 +1,5 @@
|
||||
|
@ -78,36 +29,29 @@ index f6fa6c55..bb88e278 100644
|
|||
+#version 300 es
|
||||
+precision mediump float;
|
||||
|
||||
in vec3 v_tangent;
|
||||
in vec3 v_normal;
|
||||
@@ -27,7 +28,7 @@ uniform bool u_depthmapEnable;
|
||||
uniform sampler2D u_tex0;
|
||||
uniform sampler2D u_tex1;
|
||||
uniform sampler2D u_tex2;
|
||||
-uniform sampler1DArray u_tex_lighting_lut;
|
||||
+// uniform sampler1DArray u_tex_lighting_lut;
|
||||
in vec4 v_quaternion;
|
||||
in vec4 v_colour;
|
||||
@@ -41,8 +42,8 @@ vec3 normal;
|
||||
const uint samplerEnabledBitfields[2] = uint[2](0x7170e645u, 0x7f013fefu);
|
||||
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
bool isSamplerEnabled(uint environment_id, uint lut_id) {
|
||||
- uint index = 7 * environment_id + lut_id;
|
||||
- uint arrayIndex = (index >> 5);
|
||||
+ uint index = 7u * environment_id + lut_id;
|
||||
+ uint arrayIndex = (index >> 5u);
|
||||
return (samplerEnabledBitfields[arrayIndex] & (1u << (index & 31u))) != 0u;
|
||||
}
|
||||
|
||||
@@ -145,16 +146,23 @@ vec4 tevCalculateCombiner(int tev_id) {
|
||||
#define RR_LUT 6u
|
||||
@@ -166,11 +167,17 @@ float lutLookup(uint lut, int index) {
|
||||
return texelFetch(u_tex_luts, ivec2(index, int(lut)), 0).r;
|
||||
}
|
||||
|
||||
float lutLookup(uint lut, uint light, float value) {
|
||||
- if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
|
||||
- if (lut == SP_LUT) lut = light + 8;
|
||||
- return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
+ // if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
|
||||
+ // if (lut == SP_LUT) lut = light + 8;
|
||||
+ // return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
+ return 0.0;
|
||||
+}
|
||||
+
|
||||
+// some gles versions have bitfieldExtract and complain if you redefine it, some don't and compile error, using this instead
|
||||
+// some gles versions have bitfieldExtractCompat and complain if you redefine it, some don't and compile error, using this instead
|
||||
+uint bitfieldExtractCompat(uint val, int off, int size) {
|
||||
+ uint mask = uint((1 << size) - 1);
|
||||
+ return uint(val >> off) & mask;
|
||||
}
|
||||
|
||||
+}
|
||||
+
|
||||
vec3 regToColor(uint reg) {
|
||||
// Normalization scale to convert from [0...255] to [0.0...1.0]
|
||||
const float scale = 1.0 / 255.0;
|
||||
|
@ -117,89 +61,115 @@ index f6fa6c55..bb88e278 100644
|
|||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
@@ -189,7 +197,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
vec3 view = normalize(v_view);
|
||||
@@ -201,7 +208,7 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
// These are the spotlight attenuation LUTs
|
||||
bit_in_config1 = 8 + int(light_id & 7u);
|
||||
lut_index = 8u + light_id;
|
||||
- } else if (lut_id <= 6) {
|
||||
+ } else if (lut_id <= 6u) {
|
||||
bit_in_config1 = 16 + int(lut_id);
|
||||
lut_index = lut_id;
|
||||
} else {
|
||||
@@ -210,16 +217,16 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
|
||||
bool current_sampler_enabled = isSamplerEnabled(environment_id, lut_id); // 7 luts per environment
|
||||
|
||||
- if (!current_sampler_enabled || (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
|
||||
+ if (!current_sampler_enabled || (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, bit_in_config1, 1) != 0u)) {
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
|
||||
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, int(lut_id) << 2, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
|
||||
float delta = 1.0;
|
||||
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
|
||||
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, int(lut_id) << 2, 3);
|
||||
switch (input_id) {
|
||||
case 0u: {
|
||||
delta = dot(normal, normalize(half_vector));
|
||||
@@ -243,9 +250,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
|
||||
// Sign extend them. Normally bitfieldExtract would do that but it's missing on some versions
|
||||
// of GLSL so we do it manually
|
||||
- int se_x = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 13);
|
||||
- int se_y = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 13);
|
||||
- int se_z = bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 13);
|
||||
+ int se_x = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 0, 13));
|
||||
+ int se_y = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_LOW), 16, 13));
|
||||
+ int se_z = int(bitfieldExtractCompat(uint(GPUREG_LIGHTi_SPOTDIR_HIGH), 0, 13));
|
||||
|
||||
if ((se_x & 0x1000) == 0x1000) se_x |= 0xffffe000;
|
||||
if ((se_y & 0x1000) == 0x1000) se_y |= 0xffffe000;
|
||||
@@ -272,9 +279,9 @@ float lightLutLookup(uint environment_id, uint lut_id, uint light_id, vec3 light
|
||||
}
|
||||
|
||||
// 0 = enabled
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 1 + (int(lut_id) << 2), 1) == 0u) {
|
||||
// Two sided diffuse
|
||||
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u) {
|
||||
delta = max(delta, 0.0);
|
||||
} else {
|
||||
delta = abs(delta);
|
||||
@@ -298,7 +305,7 @@ vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008Fu);
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_ENABLE, 0, 1) == 0u) {
|
||||
primary_color = secondary_color = vec4(1.0);
|
||||
primary_color = secondary_color = vec4(0.0);
|
||||
return;
|
||||
}
|
||||
@@ -213,7 +221,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
bool error_unimpl = false;
|
||||
@@ -315,7 +322,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0u);
|
||||
GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1u);
|
||||
|
||||
- uint bump_mode = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 28, 2);
|
||||
+ uint bump_mode = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 28, 2);
|
||||
|
||||
// Bump mode is ignored for now because it breaks some games ie. Toad Treasure Tracker
|
||||
switch (bump_mode) {
|
||||
@@ -328,15 +335,15 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
|
||||
- uint environment_id = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 4, 4);
|
||||
- bool clamp_highlights = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
|
||||
+ uint environment_id = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 4, 4);
|
||||
+ bool clamp_highlights = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 27, 1) == 1u;
|
||||
|
||||
uint light_id;
|
||||
vec3 light_vector;
|
||||
vec3 half_vector;
|
||||
|
||||
for (uint i = 0u; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
|
||||
- uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
|
||||
+ uint light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3u), 3);
|
||||
- light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
|
||||
+ light_id = bitfieldExtractCompat(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i) << 2, 3);
|
||||
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + 0x10u * light_id);
|
||||
@@ -224,14 +232,14 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140u + (light_id << 4u));
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141u + (light_id << 4u));
|
||||
@@ -348,12 +355,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
|
||||
vec3 light_vector = normalize(vec3(
|
||||
float light_distance;
|
||||
vec3 light_position = vec3(
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5u, 10u), decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5u, 10u),
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5u, 10u)
|
||||
));
|
||||
|
||||
vec3 half_vector;
|
||||
);
|
||||
|
||||
// Positional Light
|
||||
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 0, 1) == 0u) {
|
||||
// error_unimpl = true;
|
||||
half_vector = normalize(normalize(light_vector + v_view) + view);
|
||||
}
|
||||
@@ -242,12 +250,12 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
light_vector = light_position + v_view;
|
||||
}
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
|
||||
- uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0u) {
|
||||
+ uint scale_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
|
||||
- uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
+ uint input_id = bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
if (input_id == 0u)
|
||||
d[c] = dot(normal, half_vector);
|
||||
else if (input_id == 1u)
|
||||
@@ -260,9 +268,9 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146u + 0x10u * light_id);
|
||||
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147u + 0x10u * light_id);
|
||||
vec3 spot_light_vector = normalize(vec3(
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
|
||||
- decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1u, 11u),
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1u, 11u),
|
||||
+ decodeFP(bitfieldExtractCompat(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1u, 11u)
|
||||
));
|
||||
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
|
||||
} else if (input_id == 5u) {
|
||||
@@ -273,13 +281,13 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
}
|
||||
|
||||
d[c] = lutLookup(uint(c), light_id, d[c] * 0.5 + 0.5) * scale;
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
- uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
|
||||
+ uint lookup_config = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 4, 4);
|
||||
if (lookup_config == 0u) {
|
||||
d[D1_LUT] = 0.0;
|
||||
d[FR_LUT] = 0.0;
|
||||
@@ -310,7 +318,7 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
float NdotL = dot(normal, light_vector); // Li dot N
|
||||
@@ -369,23 +376,23 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
float NdotL = dot(normal, light_vector); // N dot Li
|
||||
|
||||
// Two sided diffuse
|
||||
- if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0u)
|
||||
|
@ -207,19 +177,40 @@ index f6fa6c55..bb88e278 100644
|
|||
NdotL = max(0.0, NdotL);
|
||||
else
|
||||
NdotL = abs(NdotL);
|
||||
@@ -321,8 +329,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
|
||||
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
|
||||
|
||||
float geometric_factor;
|
||||
- bool use_geo_0 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
|
||||
- bool use_geo_1 = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
|
||||
+ bool use_geo_0 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 2, 1) == 1u;
|
||||
+ bool use_geo_1 = bitfieldExtractCompat(GPUREG_LIGHTi_CONFIG, 3, 1) == 1u;
|
||||
if (use_geo_0 || use_geo_1) {
|
||||
geometric_factor = dot(half_vector, half_vector);
|
||||
geometric_factor = geometric_factor == 0.0 ? 0.0 : min(NdotL / geometric_factor, 1.0);
|
||||
}
|
||||
|
||||
float distance_attenuation = 1.0;
|
||||
- if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
|
||||
- uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtract(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
|
||||
- uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtract(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
|
||||
+ if (bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG1, 24 + int(light_id), 1) == 0u) {
|
||||
+ uint GPUREG_LIGHTi_ATTENUATION_BIAS = bitfieldExtractCompat(readPicaReg(0x014Au + (light_id << 4u)), 0, 20);
|
||||
+ uint GPUREG_LIGHTi_ATTENUATION_SCALE = bitfieldExtractCompat(readPicaReg(0x014Bu + (light_id << 4u)), 0, 20);
|
||||
|
||||
float distance_attenuation_bias = decodeFP(GPUREG_LIGHTi_ATTENUATION_BIAS, 7u, 12u);
|
||||
float distance_attenuation_scale = decodeFP(GPUREG_LIGHTi_ATTENUATION_SCALE, 7u, 12u);
|
||||
@@ -430,8 +437,8 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
specular_sum.rgb += light_factor * clamp_factor * (specular0 + specular1);
|
||||
}
|
||||
|
||||
- uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
- uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
+ uint fresnel_output1 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
+ uint fresnel_output2 = bitfieldExtractCompat(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
|
||||
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
|
||||
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
|
||||
// Uses parameters from the last light as Fresnel is only applied to the last light
|
||||
float fresnel_factor;
|
||||
|
||||
diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert
|
||||
index a25d7a6d..7cf40398 100644
|
||||
index 057f9a88..dc735ced 100644
|
||||
--- a/src/host_shaders/opengl_vertex_shader.vert
|
||||
+++ b/src/host_shaders/opengl_vertex_shader.vert
|
||||
@@ -1,4 +1,6 @@
|
||||
|
@ -230,7 +221,7 @@ index a25d7a6d..7cf40398 100644
|
|||
|
||||
layout(location = 0) in vec4 a_coords;
|
||||
layout(location = 1) in vec4 a_quaternion;
|
||||
@@ -20,7 +22,7 @@ out vec2 v_texcoord2;
|
||||
@@ -18,7 +20,7 @@ out vec2 v_texcoord2;
|
||||
flat out vec4 v_textureEnvColor[6];
|
||||
flat out vec4 v_textureEnvBufferColor;
|
||||
|
||||
|
@ -239,7 +230,7 @@ index a25d7a6d..7cf40398 100644
|
|||
|
||||
// TEV uniforms
|
||||
uniform uint u_textureEnvColor[6];
|
||||
@@ -93,6 +95,6 @@ void main() {
|
||||
@@ -81,8 +83,8 @@ void main() {
|
||||
);
|
||||
|
||||
// There's also another, always-on clipping plane based on vertex z
|
||||
|
@ -247,16 +238,20 @@ index a25d7a6d..7cf40398 100644
|
|||
- gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
+ // gl_ClipDistance[0] = -a_coords.z;
|
||||
+ // gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
|
||||
v_quaternion = a_quaternion;
|
||||
}
|
||||
diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp
|
||||
index f368f573..5ead7f63 100644
|
||||
index 607815fa..cbfcc096 100644
|
||||
--- a/third_party/opengl/opengl.hpp
|
||||
+++ b/third_party/opengl/opengl.hpp
|
||||
@@ -520,21 +520,21 @@ namespace OpenGL {
|
||||
@@ -602,22 +602,22 @@ namespace OpenGL {
|
||||
static void disableScissor() { glDisable(GL_SCISSOR_TEST); }
|
||||
static void enableBlend() { glEnable(GL_BLEND); }
|
||||
static void disableBlend() { glDisable(GL_BLEND); }
|
||||
static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
|
||||
- static void enableLogicOp() { glEnable(GL_COLOR_LOGIC_OP); }
|
||||
- static void disableLogicOp() { glDisable(GL_COLOR_LOGIC_OP); }
|
||||
+ static void enableLogicOp() { /* glEnable(GL_COLOR_LOGIC_OP); */ }
|
||||
+ static void disableLogicOp() { /* glDisable(GL_COLOR_LOGIC_OP); */ }
|
||||
static void enableDepth() { glEnable(GL_DEPTH_TEST); }
|
||||
static void disableDepth() { glDisable(GL_DEPTH_TEST); }
|
||||
|
|
20
.github/mac-bundle-qt.sh
vendored
|
@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec"
|
|||
|
||||
# Construct the app iconset.
|
||||
mkdir alber.iconset
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
|
||||
iconutil --convert icns alber.iconset
|
||||
|
||||
# Set up the .app directory
|
||||
|
|
20
.github/mac-bundle.sh
vendored
|
@ -5,16 +5,16 @@ PATH="$PATH:/usr/libexec"
|
|||
|
||||
# Construct the app iconset.
|
||||
mkdir alber.iconset
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
|
||||
convert docs/img/alber-icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 16x16 alber.iconset/icon_16x16.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 32x32 alber.iconset/icon_16x16@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 32x32 alber.iconset/icon_32x32.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 64x64 alber.iconset/icon_32x32@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 128x128 alber.iconset/icon_128x128.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 256x256 alber.iconset/icon_128x128@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 256x256 alber.iconset/icon_256x256.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 512x512 alber.iconset/icon_256x256@2x.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 72 -resize 512x512 alber.iconset/icon_512x512.png
|
||||
convert docs/img/mac_icon.ico -alpha on -background none -units PixelsPerInch -density 144 -resize 1024x1024 alber.iconset/icon_512x512@2x.png
|
||||
iconutil --convert icns alber.iconset
|
||||
|
||||
# Set up the .app directory
|
||||
|
|
4
.github/workflows/Android_Build.yml
vendored
|
@ -8,7 +8,7 @@ on:
|
|||
|
||||
jobs:
|
||||
x64:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
|
@ -73,7 +73,7 @@ jobs:
|
|||
./src/pandroid/app/build/outputs/apk/${{ env.BUILD_TYPE }}/app-${{ env.BUILD_TYPE }}.apk
|
||||
|
||||
arm64:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
|
|
4
.github/workflows/HTTP_Build.yml
vendored
|
@ -16,10 +16,10 @@ jobs:
|
|||
# well on Windows or Mac. You can convert this to a matrix build if you need
|
||||
# cross-platform coverage.
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
|
81
.github/workflows/Hydra_Build.yml
vendored
|
@ -15,7 +15,7 @@ jobs:
|
|||
runs-on: windows-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
@ -32,18 +32,33 @@ jobs:
|
|||
- name: Build
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload core
|
||||
uses: actions/upload-artifact@v2
|
||||
- name: Upload Hydra core
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Windows core
|
||||
name: Windows Hydra core
|
||||
path: '${{github.workspace}}/build/${{ env.BUILD_TYPE }}/Alber.dll'
|
||||
|
||||
- name: Configure CMake (Again)
|
||||
run: |
|
||||
rm -r -fo ${{github.workspace}}/build
|
||||
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
|
||||
|
||||
- name: Build (Again)
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload Libretro core
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Windows Libretro core
|
||||
path: |
|
||||
${{github.workspace}}/build/${{ env.BUILD_TYPE }}/panda3ds_libretro.dll
|
||||
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
|
||||
|
||||
MacOS:
|
||||
runs-on: macos-13
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
@ -61,22 +76,38 @@ jobs:
|
|||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload core
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: MacOS core
|
||||
name: MacOS Hydra core
|
||||
path: '${{github.workspace}}/build/libAlber.dylib'
|
||||
|
||||
- name: Configure CMake (Again)
|
||||
run: |
|
||||
rm -rf ${{github.workspace}}/build
|
||||
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
|
||||
|
||||
- name: Build (Again)
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} && ls -R ${{github.workspace}}/build
|
||||
|
||||
- name: Upload Libretro core
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: MacOS Libretro core
|
||||
path: |
|
||||
${{github.workspace}}/build/panda3ds_libretro.dylib
|
||||
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
|
||||
|
||||
Linux:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
- name: Install misc packages
|
||||
run: |
|
||||
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
|
||||
sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev
|
||||
|
||||
- name: Install newer Clang
|
||||
run: |
|
||||
|
@ -98,22 +129,38 @@ jobs:
|
|||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload core
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Linux core
|
||||
name: Linux Hydra core
|
||||
path: '${{github.workspace}}/build/libAlber.so'
|
||||
|
||||
- name: Configure CMake (Again)
|
||||
run: |
|
||||
rm -rf ${{github.workspace}}/build
|
||||
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DBUILD_LIBRETRO_CORE=ON
|
||||
|
||||
- name: Build (Again)
|
||||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload Libretro core
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Linux Libretro core
|
||||
path: |
|
||||
${{github.workspace}}/build/panda3ds_libretro.so
|
||||
${{github.workspace}}/docs/libretro/panda3ds_libretro.info
|
||||
|
||||
Android-x64:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
- name: Install misc packages
|
||||
run: |
|
||||
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
|
||||
sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev
|
||||
|
||||
- name: Setup Vulkan SDK
|
||||
uses: humbletim/setup-vulkan-sdk@v1.2.0
|
||||
|
@ -129,7 +176,7 @@ jobs:
|
|||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload core
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Android core
|
||||
name: Android Hydra core
|
||||
path: '${{github.workspace}}/build/libAlber.so'
|
||||
|
|
18
.github/workflows/Linux_AppImage_Build.yml
vendored
|
@ -16,15 +16,15 @@ jobs:
|
|||
# well on Windows or Mac. You can convert this to a matrix build if you need
|
||||
# cross-platform coverage.
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
- name: Install misc packages
|
||||
run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2
|
||||
run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev
|
||||
|
||||
- name: Install newer Clang
|
||||
run: |
|
||||
|
@ -33,11 +33,11 @@ jobs:
|
|||
sudo ./llvm.sh 17
|
||||
|
||||
- name: Setup Vulkan SDK
|
||||
run: |
|
||||
wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
||||
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
|
||||
sudo apt update
|
||||
sudo apt install vulkan-sdk
|
||||
uses: humbletim/setup-vulkan-sdk@v1.2.0
|
||||
with:
|
||||
vulkan-query-version: latest
|
||||
vulkan-use-cache: true
|
||||
vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang
|
||||
|
||||
- name: Configure CMake
|
||||
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
|
||||
|
@ -52,7 +52,7 @@ jobs:
|
|||
run: ./.github/linux-appimage.sh
|
||||
|
||||
- name: Upload executable
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Linux executable
|
||||
path: './Alber-x86_64.AppImage'
|
||||
|
|
8
.github/workflows/Linux_Build.yml
vendored
|
@ -16,15 +16,15 @@ jobs:
|
|||
# well on Windows or Mac. You can convert this to a matrix build if you need
|
||||
# cross-platform coverage.
|
||||
# See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
- name: Install misc packages
|
||||
run: sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev
|
||||
run: sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libwayland-dev
|
||||
|
||||
- name: Install newer Clang
|
||||
run: |
|
||||
|
@ -49,7 +49,7 @@ jobs:
|
|||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload executable
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Linux executable
|
||||
path: './build/Alber'
|
||||
|
|
4
.github/workflows/MacOS_Build.yml
vendored
|
@ -19,7 +19,7 @@ jobs:
|
|||
runs-on: macos-13
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
@ -52,7 +52,7 @@ jobs:
|
|||
run: zip -r Alber Alber.app
|
||||
|
||||
- name: Upload MacOS App
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: MacOS Alber App Bundle
|
||||
path: 'Alber.zip'
|
||||
|
|
27
.github/workflows/Qt_Build.yml
vendored
|
@ -15,7 +15,7 @@ jobs:
|
|||
runs-on: windows-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
@ -45,7 +45,7 @@ jobs:
|
|||
windeployqt --dir upload upload/Alber.exe
|
||||
|
||||
- name: Upload executable
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Windows executable
|
||||
path: upload
|
||||
|
@ -54,7 +54,7 @@ jobs:
|
|||
runs-on: macos-13
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
@ -90,23 +90,22 @@ jobs:
|
|||
run: zip -r Alber Alber.app
|
||||
|
||||
- name: Upload MacOS App
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: MacOS Alber App Bundle
|
||||
path: 'Alber.zip'
|
||||
|
||||
Linux:
|
||||
runs-on: ubuntu-20.04
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
- name: Install misc packages
|
||||
run: |
|
||||
sudo apt-get update && sudo apt install libx11-dev libgl1-mesa-glx mesa-common-dev libfuse2 libwayland-dev
|
||||
sudo add-apt-repository -y ppa:savoury1/qt-6-2
|
||||
sudo apt-get update && sudo apt install libx11-dev libgl1 libglx-mesa0 mesa-common-dev libfuse2 libwayland-dev libgl1-mesa-dev
|
||||
sudo apt update
|
||||
sudo apt install qt6-base-dev qt6-base-private-dev
|
||||
|
||||
|
@ -117,11 +116,11 @@ jobs:
|
|||
sudo ./llvm.sh 17
|
||||
|
||||
- name: Setup Vulkan SDK
|
||||
run: |
|
||||
wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
||||
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
|
||||
sudo apt update
|
||||
sudo apt install vulkan-sdk
|
||||
uses: humbletim/setup-vulkan-sdk@v1.2.0
|
||||
with:
|
||||
vulkan-query-version: latest
|
||||
vulkan-use-cache: true
|
||||
vulkan-components: Vulkan-Headers, Vulkan-Loader, SPIRV-Tools, Glslang
|
||||
|
||||
- name: Configure CMake
|
||||
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DENABLE_USER_BUILD=ON -DENABLE_QT_GUI=ON
|
||||
|
@ -135,7 +134,7 @@ jobs:
|
|||
./.github/linux-appimage-qt.sh
|
||||
|
||||
- name: Upload executable
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Linux executable
|
||||
path: './Alber-x86_64.AppImage'
|
||||
|
|
4
.github/workflows/Windows_Build.yml
vendored
|
@ -19,7 +19,7 @@ jobs:
|
|||
runs-on: windows-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch submodules
|
||||
run: git submodule update --init --recursive
|
||||
|
||||
|
@ -40,7 +40,7 @@ jobs:
|
|||
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
|
||||
|
||||
- name: Upload executable
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Windows executable
|
||||
path: './build/${{ env.BUILD_TYPE }}/Alber.exe'
|
||||
|
|
4
.gitignore
vendored
|
@ -64,5 +64,9 @@ fb.bat
|
|||
*.elf
|
||||
*.smdh
|
||||
|
||||
# Compiled Metal shader files
|
||||
*.ir
|
||||
*.metallib
|
||||
|
||||
config.toml
|
||||
CMakeSettings.json
|
||||
|
|
130
.gitlab-ci.yml
Normal file
|
@ -0,0 +1,130 @@
|
|||
# DESCRIPTION: GitLab CI/CD for libRetro (NOT FOR GitLab-proper)
|
||||
|
||||
##############################################################################
|
||||
################################# BOILERPLATE ################################
|
||||
##############################################################################
|
||||
|
||||
# Core definitions
|
||||
.core-defs:
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
CORENAME: panda3ds
|
||||
CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF
|
||||
|
||||
# Inclusion templates, required for the build to work
|
||||
|
||||
include:
|
||||
################################## DESKTOPS ################################
|
||||
# Linux
|
||||
- project: 'libretro-infrastructure/ci-templates'
|
||||
file: '/linux-cmake.yml'
|
||||
|
||||
# Windows
|
||||
- project: 'libretro-infrastructure/ci-templates'
|
||||
file: '/windows-cmake-mingw.yml'
|
||||
|
||||
# MacOS
|
||||
- project: 'libretro-infrastructure/ci-templates'
|
||||
file: 'osx-cmake-x86.yml'
|
||||
|
||||
# MacOS
|
||||
- project: 'libretro-infrastructure/ci-templates'
|
||||
file: 'osx-cmake-arm64.yml'
|
||||
|
||||
################################## CELLULAR ################################
|
||||
# Android
|
||||
- project: 'libretro-infrastructure/ci-templates'
|
||||
file: '/android-cmake.yml'
|
||||
|
||||
# iOS
|
||||
- project: 'libretro-infrastructure/ci-templates'
|
||||
file: '/ios-cmake.yml'
|
||||
|
||||
# Stages for building
|
||||
stages:
|
||||
- build-prepare
|
||||
- build-static
|
||||
- build-shared
|
||||
|
||||
##############################################################################
|
||||
#################################### STAGES ##################################
|
||||
##############################################################################
|
||||
#
|
||||
################################### DESKTOPS #################################
|
||||
# Linux 64-bit
|
||||
libretro-build-linux-x64:
|
||||
image: $CI_SERVER_HOST:5050/libretro-infrastructure/libretro-build-amd64-ubuntu:latest
|
||||
before_script:
|
||||
- export NUMPROC=$(($(nproc)/5))
|
||||
- sudo apt-get update -qy
|
||||
- sudo apt-get install -qy software-properties-common
|
||||
- sudo add-apt-repository -y ppa:savoury1/build-tools
|
||||
- sudo add-apt-repository -y ppa:savoury1/gcc-defaults-12
|
||||
- sudo apt-get update -qy
|
||||
- sudo apt-get install -qy cmake gcc-12 g++-12
|
||||
variables:
|
||||
CC: /usr/bin/gcc-12
|
||||
CXX: /usr/bin/g++-12
|
||||
extends:
|
||||
- .libretro-linux-cmake-x86_64
|
||||
- .core-defs
|
||||
|
||||
# Windows 64-bit
|
||||
libretro-build-windows-x64:
|
||||
extends:
|
||||
- .libretro-windows-cmake-x86_64
|
||||
- .core-defs
|
||||
|
||||
# MacOS 64-bit
|
||||
libretro-build-osx-x64:
|
||||
tags:
|
||||
- mac-apple-silicon
|
||||
variables:
|
||||
CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DENABLE_USER_BUILD=ON -DENABLE_VULKAN=OFF -DENABLE_LUAJIT=OFF -DENABLE_DISCORD_RPC=OFF -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCRYPTOPP_AMD64=1
|
||||
extends:
|
||||
- .libretro-osx-cmake-x86
|
||||
- .core-defs
|
||||
|
||||
# MacOS arm 64-bit
|
||||
libretro-build-osx-arm64:
|
||||
tags:
|
||||
- mac-apple-silicon
|
||||
extends:
|
||||
- .libretro-osx-cmake-arm64
|
||||
- .core-defs
|
||||
|
||||
################################### CELLULAR #################################
|
||||
# Android ARMv7a
|
||||
#android-armeabi-v7a:
|
||||
# extends:
|
||||
# - .libretro-android-cmake-armeabi-v7a
|
||||
# - .core-defs
|
||||
|
||||
# Android ARMv8a
|
||||
# android-arm64-v8a:
|
||||
# extends:
|
||||
# - .libretro-android-cmake-arm64-v8a
|
||||
# - .core-defs
|
||||
|
||||
# Android 64-bit x86
|
||||
# android-x86_64:
|
||||
# extends:
|
||||
# - .libretro-android-cmake-x86_64
|
||||
# - .core-defs
|
||||
|
||||
# Android 32-bit x86
|
||||
# android-x86:
|
||||
# extends:
|
||||
# - .libretro-android-cmake-x86
|
||||
# - .core-defs
|
||||
|
||||
# iOS
|
||||
# libretro-build-ios-arm64:
|
||||
# extends:
|
||||
# - .libretro-ios-cmake-arm64
|
||||
# - .core-defs
|
||||
# variables:
|
||||
# CORE_ARGS: -DBUILD_LIBRETRO_CORE=ON -DBUILD_PLAY=OFF -DENABLE_AMAZON_S3=off -DBUILD_TESTS=OFF -DCMAKE_TOOLCHAIN_FILE=deps/Dependencies/cmake-ios/ios.cmake -DTARGET_IOS=ON
|
||||
# LIBNAME: ${CORENAME}_libretro_ios.dylib
|
||||
|
||||
################################### CONSOLES #################################
|
12
.gitmodules
vendored
|
@ -70,3 +70,15 @@
|
|||
[submodule "third_party/capstone"]
|
||||
path = third_party/capstone
|
||||
url = https://github.com/capstone-engine/capstone
|
||||
[submodule "third_party/hips"]
|
||||
path = third_party/hips
|
||||
url = https://github.com/wheremyfoodat/Hips
|
||||
[submodule "third_party/metal-cpp"]
|
||||
path = third_party/metal-cpp
|
||||
url = https://github.com/Panda3DS-emu/metal-cpp
|
||||
[submodule "third_party/fmt"]
|
||||
path = third_party/fmt
|
||||
url = https://github.com/fmtlib/fmt
|
||||
[submodule "third_party/fdk-aac"]
|
||||
path = third_party/fdk-aac
|
||||
url = https://github.com/Panda3DS-emu/fdk-aac/
|
||||
|
|
242
CMakeLists.txt
|
@ -19,19 +19,37 @@ endif()
|
|||
|
||||
project(Alber)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
|
||||
|
||||
if(APPLE)
|
||||
enable_language(OBJC)
|
||||
endif()
|
||||
|
||||
# Enable RC support in order to use resource files for application icons
|
||||
if(WIN32)
|
||||
enable_language(RC)
|
||||
set(APP_RESOURCES docs/img/windows_icon.rc)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security -Wno-invalid-offsetof")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-interference-size")
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
set(DEFAULT_OPENGL_PROFILE OpenGLES)
|
||||
else()
|
||||
set(DEFAULT_OPENGL_PROFILE OpenGL)
|
||||
endif()
|
||||
|
||||
option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" ON)
|
||||
option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
|
||||
option(ENABLE_OPENGL "Enable OpenGL rendering backend" ON)
|
||||
option(ENABLE_VULKAN "Enable Vulkan rendering backend" ON)
|
||||
option(ENABLE_METAL "Enable Metal rendering backend (if available)" ON)
|
||||
option(ENABLE_LTO "Enable link-time optimization" OFF)
|
||||
option(ENABLE_TESTS "Compile unit-tests" OFF)
|
||||
option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF)
|
||||
|
@ -39,12 +57,65 @@ option(ENABLE_HTTP_SERVER "Enable HTTP server. Used for Discord bot support" OFF
|
|||
option(ENABLE_DISCORD_RPC "Compile with Discord RPC support (disabled by default)" ON)
|
||||
option(ENABLE_LUAJIT "Enable scripting with the Lua programming language" ON)
|
||||
option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
|
||||
option(ENABLE_GIT_VERSIONING "Enables querying git for the emulator version" ON)
|
||||
option(BUILD_HYDRA_CORE "Build a Hydra core" OFF)
|
||||
option(BUILD_LIBRETRO_CORE "Build a Libretro core" OFF)
|
||||
option(ENABLE_RENDERDOC_API "Build with support for Renderdoc's capture API for graphics debugging" ON)
|
||||
option(DISABLE_SSE4 "Build with SSE4 instructions disabled, may reduce performance" OFF)
|
||||
|
||||
set(OPENGL_PROFILE ${DEFAULT_OPENGL_PROFILE} CACHE STRING "OpenGL profile to use if OpenGL is enabled. Valid values are 'OpenGL' and 'OpenGLES'.")
|
||||
set_property(CACHE OPENGL_PROFILE PROPERTY STRINGS OpenGL OpenGLES)
|
||||
|
||||
if(ENABLE_OPENGL AND (OPENGL_PROFILE STREQUAL "OpenGLES"))
|
||||
message(STATUS "Building with OpenGLES support")
|
||||
add_compile_definitions(USING_GLES)
|
||||
endif()
|
||||
|
||||
if(BUILD_HYDRA_CORE)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
if(BUILD_LIBRETRO_CORE)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
add_compile_definitions(__LIBRETRO__)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND ENABLE_USER_BUILD)
|
||||
# Disable stack buffer overflow checks in user builds
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS-")
|
||||
endif()
|
||||
|
||||
# Generate versioning files
|
||||
find_package(Git)
|
||||
set(PANDA3DS_VERSION "0.8")
|
||||
|
||||
if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/version.hpp.in)
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/include/version.hpp.in "#define PANDA3DS_VERSION \"\${PANDA3DS_VERSION}\"")
|
||||
endif()
|
||||
|
||||
if(GIT_FOUND AND ENABLE_GIT_VERSIONING)
|
||||
execute_process(
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0
|
||||
OUTPUT_VARIABLE PANDA3DS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
execute_process(
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --tags
|
||||
OUTPUT_VARIABLE git_version_tag OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
if(NOT PANDA3DS_VERSION STREQUAL git_version_tag)
|
||||
execute_process(
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} COMMAND ${GIT_EXECUTABLE} describe --always --abbrev=7
|
||||
OUTPUT_VARIABLE git_version_rev OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
set(PANDA3DS_VERSION "${PANDA3DS_VERSION}.${git_version_rev}")
|
||||
unset(git_version_rev)
|
||||
endif()
|
||||
string(REGEX REPLACE "^v" "" PANDA3DS_VERSION "${PANDA3DS_VERSION}")
|
||||
unset(git_version_tag)
|
||||
endif()
|
||||
configure_file(${CMAKE_BINARY_DIR}/include/version.hpp.in ${CMAKE_BINARY_DIR}/include/version.hpp)
|
||||
include_directories(${CMAKE_BINARY_DIR}/include/)
|
||||
|
||||
add_library(AlberCore STATIC)
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include/)
|
||||
|
@ -52,6 +123,7 @@ include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
|
|||
include_directories(${FMT_INCLUDE_DIR})
|
||||
include_directories(third_party/boost/)
|
||||
include_directories(third_party/elfio/)
|
||||
include_directories(third_party/hips/include/)
|
||||
include_directories(third_party/imgui/)
|
||||
include_directories(third_party/dynarmic/src)
|
||||
include_directories(third_party/cryptopp/)
|
||||
|
@ -82,10 +154,13 @@ if (NOT ANDROID)
|
|||
target_link_libraries(AlberCore PUBLIC SDL2-static)
|
||||
endif()
|
||||
|
||||
add_subdirectory(third_party/fmt)
|
||||
add_subdirectory(third_party/toml11)
|
||||
include_directories(${SDL2_INCLUDE_DIR})
|
||||
include_directories(third_party/toml11)
|
||||
include_directories(third_party/glm)
|
||||
include_directories(third_party/renderdoc)
|
||||
include_directories(third_party/duckstation)
|
||||
|
||||
add_subdirectory(third_party/cmrc)
|
||||
|
||||
|
@ -144,6 +219,18 @@ else()
|
|||
set(HOST_ARM64 FALSE)
|
||||
endif()
|
||||
|
||||
# Enable SSE4.1 if it's not explicitly disabled
|
||||
# Annoyingly, we can't easily do this if we're using MSVC cause there's no SSE4.1 flag, only SSE4.1
|
||||
if(NOT MSVC OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT DISABLE_SSE4 AND HOST_X64)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
|
||||
endif()
|
||||
|
||||
if(ENABLE_RENDERDOC_API)
|
||||
find_package(RenderDoc 1.6.0 MODULE REQUIRED)
|
||||
add_compile_definitions(PANDA3DS_ENABLE_RENDERDOC)
|
||||
endif()
|
||||
|
||||
if(HOST_X64 OR HOST_ARM64)
|
||||
set(DYNARMIC_TESTS OFF)
|
||||
#set(DYNARMIC_NO_BUNDLED_FMT ON)
|
||||
|
@ -155,6 +242,7 @@ else()
|
|||
endif()
|
||||
|
||||
add_subdirectory(third_party/teakra EXCLUDE_FROM_ALL)
|
||||
add_subdirectory(third_party/fdk-aac)
|
||||
|
||||
set(CAPSTONE_ARCHITECTURE_DEFAULT OFF)
|
||||
set(CAPSTONE_ARM_SUPPORT ON)
|
||||
|
@ -166,7 +254,7 @@ set(SOURCE_FILES src/emulator.cpp src/io_file.cpp src/config.cpp
|
|||
src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp
|
||||
src/core/memory.cpp src/renderer.cpp src/core/renderer_null/renderer_null.cpp
|
||||
src/http_server.cpp src/stb_image_write.c src/core/cheats.cpp src/core/action_replay.cpp
|
||||
src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp
|
||||
src/discord_rpc.cpp src/lua.cpp src/memory_mapped_file.cpp src/miniaudio.cpp src/renderdoc.cpp
|
||||
)
|
||||
set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp)
|
||||
set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp
|
||||
|
@ -187,12 +275,13 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services
|
|||
src/core/services/act.cpp src/core/services/nfc.cpp src/core/services/dlp_srvr.cpp
|
||||
src/core/services/ir_user.cpp src/core/services/http.cpp src/core/services/soc.cpp
|
||||
src/core/services/ssl.cpp src/core/services/news_u.cpp src/core/services/amiibo_device.cpp
|
||||
src/core/services/csnd.cpp src/core/services/nwm_uds.cpp
|
||||
src/core/services/csnd.cpp src/core/services/nwm_uds.cpp src/core/services/fonts.cpp
|
||||
)
|
||||
set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp
|
||||
src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp
|
||||
src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp
|
||||
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp
|
||||
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp
|
||||
src/core/PICA/shader_decompiler.cpp src/core/PICA/draw_acceleration.cpp
|
||||
)
|
||||
|
||||
set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp)
|
||||
|
@ -205,7 +294,7 @@ set(APPLET_SOURCE_FILES src/core/applets/applet.cpp src/core/applets/mii_selecto
|
|||
src/core/applets/error_applet.cpp
|
||||
)
|
||||
set(AUDIO_SOURCE_FILES src/core/audio/dsp_core.cpp src/core/audio/null_core.cpp src/core/audio/teakra_core.cpp
|
||||
src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp
|
||||
src/core/audio/miniaudio_device.cpp src/core/audio/hle_core.cpp src/core/audio/aac_decoder.cpp
|
||||
)
|
||||
set(RENDERER_SW_SOURCE_FILES src/core/renderer_sw/renderer_sw.cpp)
|
||||
|
||||
|
@ -224,7 +313,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
|
|||
include/services/mic.hpp include/services/cecd.hpp include/services/ac.hpp
|
||||
include/services/am.hpp include/services/boss.hpp include/services/frd.hpp include/services/nim.hpp
|
||||
include/fs/archive_ext_save_data.hpp include/fs/archive_ncch.hpp include/services/mcu/mcu_hwc.hpp
|
||||
include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp
|
||||
include/colour.hpp include/services/y2r.hpp include/services/cam.hpp include/services/ssl.hpp
|
||||
include/services/ldr_ro.hpp include/ipc.hpp include/services/act.hpp include/services/nfc.hpp
|
||||
include/system_models.hpp include/services/dlp_srvr.hpp include/PICA/dynapica/pica_recs.hpp
|
||||
include/PICA/dynapica/x64_regs.hpp include/PICA/dynapica/vertex_loader_rec.hpp include/PICA/dynapica/shader_rec.hpp
|
||||
|
@ -235,21 +324,24 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
|
|||
include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp
|
||||
include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp
|
||||
include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp
|
||||
include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp
|
||||
include/applets/applet.hpp include/applets/mii_selector.hpp include/math_util.hpp include/services/soc.hpp
|
||||
include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp
|
||||
include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp
|
||||
include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp
|
||||
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp
|
||||
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp
|
||||
include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp
|
||||
include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp
|
||||
include/audio/hle_core.hpp include/capstone.hpp
|
||||
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp
|
||||
include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp
|
||||
include/PICA/pica_vert_config.hpp include/sdl_sensors.hpp include/PICA/draw_acceleration.hpp include/renderdoc.hpp
|
||||
include/align.hpp include/audio/aac_decoder.hpp include/PICA/pica_simd.hpp include/services/fonts.hpp
|
||||
)
|
||||
|
||||
cmrc_add_resource_library(
|
||||
resources_console_fonts
|
||||
NAMESPACE ConsoleFonts
|
||||
WHENCE "src/core/services/fonts/"
|
||||
"src/core/services/fonts/CitraSharedFontUSRelocated.bin"
|
||||
"src/core/services/fonts/SharedFontReplacement.bin"
|
||||
)
|
||||
|
||||
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
|
||||
|
@ -275,7 +367,6 @@ if(ENABLE_LUAJIT AND NOT ANDROID)
|
|||
endif()
|
||||
|
||||
if(ENABLE_QT_GUI)
|
||||
include_directories(third_party/duckstation)
|
||||
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/window_info.cpp third_party/duckstation/gl/context.cpp)
|
||||
|
||||
if(APPLE)
|
||||
|
@ -308,7 +399,7 @@ if(ENABLE_OPENGL)
|
|||
set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp
|
||||
include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp
|
||||
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
|
||||
include/renderer_gl/gl_state.hpp
|
||||
include/renderer_gl/gl_state.hpp include/renderer_gl/gl_driver.hpp
|
||||
)
|
||||
|
||||
set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp
|
||||
|
@ -318,6 +409,8 @@ if(ENABLE_OPENGL)
|
|||
src/host_shaders/opengl_fragment_shader.frag
|
||||
)
|
||||
|
||||
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/stream_buffer.cpp)
|
||||
|
||||
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES})
|
||||
source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES})
|
||||
|
||||
|
@ -338,8 +431,8 @@ endif()
|
|||
|
||||
if(ENABLE_VULKAN)
|
||||
find_package(
|
||||
Vulkan 1.3.206 REQUIRED
|
||||
COMPONENTS glslangValidator
|
||||
Vulkan REQUIRED
|
||||
COMPONENTS glslang
|
||||
)
|
||||
|
||||
set(RENDERER_VK_INCLUDE_FILES include/renderer_vk/renderer_vk.hpp
|
||||
|
@ -382,7 +475,7 @@ if(ENABLE_VULKAN)
|
|||
add_custom_command(
|
||||
OUTPUT ${HOST_SHADER_SPIRV}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${PROJECT_BINARY_DIR}/host_shaders/"
|
||||
COMMAND Vulkan::glslangValidator ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV}
|
||||
COMMAND glslang ${RENDERER_VK_HOST_SHADERS_FLAGS} -V "${PROJECT_SOURCE_DIR}/${HOST_SHADER_SOURCE}" -o ${HOST_SHADER_SPIRV}
|
||||
DEPENDS ${HOST_SHADER_SOURCE}
|
||||
)
|
||||
list( APPEND RENDERER_VK_HOST_SHADERS_SPIRV ${HOST_SHADER_SPIRV} )
|
||||
|
@ -400,14 +493,88 @@ if(ENABLE_VULKAN)
|
|||
target_link_libraries(AlberCore PRIVATE Vulkan::Vulkan resources_renderer_vk)
|
||||
endif()
|
||||
|
||||
if(ENABLE_METAL AND APPLE)
|
||||
set(RENDERER_MTL_INCLUDE_FILES include/renderer_mtl/renderer_mtl.hpp
|
||||
include/renderer_mtl/mtl_depth_stencil_cache.hpp
|
||||
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
|
||||
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
|
||||
include/renderer_mtl/mtl_render_target.hpp
|
||||
include/renderer_mtl/mtl_texture.hpp
|
||||
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
|
||||
include/renderer_mtl/mtl_lut_texture.hpp
|
||||
include/renderer_mtl/mtl_command_encoder.hpp
|
||||
include/renderer_mtl/mtl_common.hpp
|
||||
include/renderer_mtl/pica_to_mtl.hpp
|
||||
include/renderer_mtl/objc_helper.hpp
|
||||
)
|
||||
|
||||
set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
|
||||
src/core/renderer_mtl/renderer_mtl.cpp
|
||||
src/core/renderer_mtl/mtl_texture.cpp
|
||||
src/core/renderer_mtl/mtl_etc1.cpp
|
||||
src/core/renderer_mtl/mtl_lut_texture.cpp
|
||||
src/core/renderer_mtl/objc_helper.mm
|
||||
src/host_shaders/metal_shaders.metal
|
||||
src/host_shaders/metal_blit.metal
|
||||
#src/host_shaders/metal_copy_to_lut_texture.metal
|
||||
)
|
||||
|
||||
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_MTL_INCLUDE_FILES})
|
||||
source_group("Source Files\\Core\\Metal Renderer" FILES ${RENDERER_MTL_SOURCE_FILES})
|
||||
|
||||
set(RENDERER_MTL_HOST_SHADERS_SOURCES)
|
||||
function (add_metal_shader SHADER)
|
||||
set(SHADER_SOURCE "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metal")
|
||||
set(SHADER_IR "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.ir")
|
||||
set(SHADER_METALLIB "${CMAKE_SOURCE_DIR}/src/host_shaders/${SHADER}.metallib")
|
||||
# TODO: only include sources in debug builds
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_IR}
|
||||
COMMAND xcrun -sdk macosx metal -gline-tables-only -frecord-sources -o ${SHADER_IR} -c ${SHADER_SOURCE}
|
||||
DEPENDS ${SHADER_SOURCE}
|
||||
VERBATIM)
|
||||
add_custom_command(
|
||||
OUTPUT ${SHADER_METALLIB}
|
||||
COMMAND xcrun -sdk macosx metallib -o ${SHADER_METALLIB} ${SHADER_IR}
|
||||
DEPENDS ${SHADER_IR}
|
||||
VERBATIM)
|
||||
set(RENDERER_MTL_HOST_SHADERS_SOURCES ${RENDERER_MTL_HOST_SHADERS_SOURCES} ${SHADER_METALLIB})
|
||||
endfunction()
|
||||
|
||||
add_metal_shader(metal_shaders)
|
||||
add_metal_shader(metal_blit)
|
||||
#add_metal_shader(metal_copy_to_lut_texture)
|
||||
|
||||
add_custom_target(
|
||||
compile_msl_shaders
|
||||
DEPENDS ${RENDERER_MTL_HOST_SHADERS_SOURCES}
|
||||
)
|
||||
|
||||
cmrc_add_resource_library(
|
||||
resources_renderer_mtl
|
||||
NAMESPACE RendererMTL
|
||||
WHENCE "src/host_shaders/"
|
||||
"src/host_shaders/metal_shaders.metallib"
|
||||
"src/host_shaders/metal_blit.metallib"
|
||||
#"src/host_shaders/metal_copy_to_lut_texture.metallib"
|
||||
)
|
||||
add_dependencies(resources_renderer_mtl compile_msl_shaders)
|
||||
|
||||
target_sources(AlberCore PRIVATE ${RENDERER_MTL_SOURCE_FILES})
|
||||
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_METAL=1")
|
||||
target_include_directories(AlberCore PRIVATE third_party/metal-cpp)
|
||||
# TODO: check if all of them are needed
|
||||
target_link_libraries(AlberCore PRIVATE "-framework Metal" "-framework Foundation" "-framework QuartzCore" resources_renderer_mtl)
|
||||
endif()
|
||||
|
||||
source_group("Header Files\\Core" FILES ${HEADER_FILES})
|
||||
set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES}
|
||||
set(ALL_SOURCES ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES}
|
||||
${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES} ${APPLET_SOURCE_FILES} ${RENDERER_SW_SOURCE_FILES} ${PICA_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES}
|
||||
${AUDIO_SOURCE_FILES} ${HEADER_FILES} ${FRONTEND_HEADER_FILES})
|
||||
target_sources(AlberCore PRIVATE ${ALL_SOURCES})
|
||||
|
||||
target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra)
|
||||
target_link_libraries(AlberCore PUBLIC glad capstone)
|
||||
target_link_libraries(AlberCore PRIVATE dynarmic cryptopp glad resources_console_fonts teakra fdk-aac)
|
||||
target_link_libraries(AlberCore PUBLIC glad capstone fmt::fmt)
|
||||
|
||||
if(ENABLE_DISCORD_RPC AND NOT ANDROID)
|
||||
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_ENABLE_DISCORD_RPC=1")
|
||||
|
@ -438,7 +605,7 @@ else()
|
|||
target_compile_definitions(AlberCore PUBLIC "PANDA3DS_FRONTEND_SDL=1")
|
||||
endif()
|
||||
|
||||
if(NOT BUILD_HYDRA_CORE)
|
||||
if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE)
|
||||
add_executable(Alber)
|
||||
|
||||
if(ENABLE_QT_GUI)
|
||||
|
@ -447,11 +614,16 @@ if(NOT BUILD_HYDRA_CORE)
|
|||
message(FATAL_ERROR "Qt frontend requires OpenGL")
|
||||
endif()
|
||||
|
||||
option(GENERATE_QT_TRANSLATION "Generate Qt translation file" OFF)
|
||||
set(QT_LANGUAGES docs/translations)
|
||||
|
||||
set(FRONTEND_SOURCE_FILES src/panda_qt/main.cpp src/panda_qt/screen.cpp src/panda_qt/main_window.cpp src/panda_qt/about_window.cpp
|
||||
src/panda_qt/config_window.cpp src/panda_qt/zep.cpp src/panda_qt/text_editor.cpp src/panda_qt/cheats_window.cpp src/panda_qt/mappings.cpp
|
||||
)
|
||||
src/panda_qt/patch_window.cpp src/panda_qt/elided_label.cpp src/panda_qt/shader_editor.cpp
|
||||
)
|
||||
set(FRONTEND_HEADER_FILES include/panda_qt/screen.hpp include/panda_qt/main_window.hpp include/panda_qt/about_window.hpp
|
||||
include/panda_qt/config_window.hpp include/panda_qt/text_editor.hpp include/panda_qt/cheats_window.hpp
|
||||
include/panda_qt/patch_window.hpp include/panda_qt/elided_label.hpp include/panda_qt/shader_editor.hpp
|
||||
)
|
||||
|
||||
source_group("Source Files\\Qt" FILES ${FRONTEND_SOURCE_FILES})
|
||||
|
@ -481,27 +653,47 @@ if(NOT BUILD_HYDRA_CORE)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# Generates an en.ts file for translations
|
||||
# To update the file, use cmake --build --target Alber_lupdate
|
||||
if(GENERATE_QT_TRANSLATION)
|
||||
find_package(Qt6 REQUIRED COMPONENTS LinguistTools)
|
||||
qt_add_lupdate(Alber TS_FILES ${QT_LANGUAGES}/en.ts
|
||||
SOURCES ${FRONTEND_SOURCE_FILES}
|
||||
INCLUDE_DIRECTORIES ${FRONTEND_HEADER_FILES}
|
||||
NO_GLOBAL_TARGET
|
||||
)
|
||||
endif()
|
||||
|
||||
qt_add_resources(AlberCore "app_images"
|
||||
PREFIX "/"
|
||||
FILES
|
||||
docs/img/rsob_icon.png docs/img/rstarstruck_icon.png
|
||||
docs/img/rsob_icon.png docs/img/rstarstruck_icon.png docs/img/rpog_icon.png docs/img/rsyn_icon.png
|
||||
)
|
||||
else()
|
||||
set(FRONTEND_SOURCE_FILES src/panda_sdl/main.cpp src/panda_sdl/frontend_sdl.cpp src/panda_sdl/mappings.cpp)
|
||||
set(FRONTEND_HEADER_FILES "")
|
||||
set(FRONTEND_HEADER_FILES "include/panda_sdl/frontend_sdl.hpp")
|
||||
endif()
|
||||
|
||||
target_link_libraries(Alber PRIVATE AlberCore)
|
||||
target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES})
|
||||
target_sources(Alber PRIVATE ${FRONTEND_SOURCE_FILES} ${FRONTEND_HEADER_FILES} ${APP_RESOURCES})
|
||||
elseif(BUILD_HYDRA_CORE)
|
||||
target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1)
|
||||
include_directories(third_party/hydra_core/include)
|
||||
add_library(Alber SHARED src/hydra_core.cpp)
|
||||
target_link_libraries(Alber PUBLIC AlberCore)
|
||||
elseif(BUILD_LIBRETRO_CORE)
|
||||
include_directories(third_party/libretro/include)
|
||||
add_library(panda3ds_libretro SHARED src/libretro_core.cpp)
|
||||
target_link_libraries(panda3ds_libretro PUBLIC AlberCore)
|
||||
set_target_properties(panda3ds_libretro PROPERTIES PREFIX "")
|
||||
endif()
|
||||
|
||||
if(ENABLE_LTO OR ENABLE_USER_BUILD)
|
||||
set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
if (NOT BUILD_LIBRETRO_CORE)
|
||||
set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
else()
|
||||
set_target_properties(panda3ds_libretro PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_TESTS)
|
||||
|
|
25
cmake/FindRenderDoc.cmake
Normal file
|
@ -0,0 +1,25 @@
|
|||
# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
set(RENDERDOC_INCLUDE_DIR third_party/renderdoc)
|
||||
|
||||
if (RENDERDOC_INCLUDE_DIR AND EXISTS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h")
|
||||
file(STRINGS "${RENDERDOC_INCLUDE_DIR}/renderdoc_app.h" RENDERDOC_VERSION_LINE REGEX "typedef struct RENDERDOC_API")
|
||||
string(REGEX REPLACE ".*typedef struct RENDERDOC_API_([0-9]+)_([0-9]+)_([0-9]+).*" "\\1.\\2.\\3" RENDERDOC_VERSION "${RENDERDOC_VERSION_LINE}")
|
||||
unset(RENDERDOC_VERSION_LINE)
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(RenderDoc
|
||||
REQUIRED_VARS RENDERDOC_INCLUDE_DIR
|
||||
VERSION_VAR RENDERDOC_VERSION
|
||||
)
|
||||
|
||||
if (RenderDoc_FOUND AND NOT TARGET RenderDoc::API)
|
||||
add_library(RenderDoc::API INTERFACE IMPORTED)
|
||||
set_target_properties(RenderDoc::API PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${RENDERDOC_INCLUDE_DIR}"
|
||||
)
|
||||
endif()
|
||||
|
||||
mark_as_advanced(RENDERDOC_INCLUDE_DIR)
|
BIN
docs/3ds/accelerometer_readings/readings_flat_1.png
Normal file
After Width: | Height: | Size: 148 KiB |
BIN
docs/3ds/accelerometer_readings/readings_flat_2.png
Normal file
After Width: | Height: | Size: 54 KiB |
BIN
docs/3ds/accelerometer_readings/readings_shaking_1.png
Normal file
After Width: | Height: | Size: 212 KiB |
BIN
docs/3ds/accelerometer_readings/readings_shaking_2.png
Normal file
After Width: | Height: | Size: 65 KiB |
79
docs/3ds/lighting.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
## Info on the lighting implementation
|
||||
|
||||
### Missing shadow attenuation
|
||||
Shadow attenuation samples a texture unit, and that likely needs render to texture for most games so that they can construct
|
||||
their shadow map. As such the colors are not multiplied by the shadow attenuation value, so there's no shadows.
|
||||
|
||||
### Missing bump mapping
|
||||
Bump mapping also samples a texture unit, most likely doesn't need render to texture however may need better texture sampling
|
||||
implementation (such as GPUREG_TEXUNITi_BORDER_COLOR, GPUREG_TEXUNITi_BORDER_PARAM). Bump mapping would work for some things,
|
||||
namely the 3ds-examples bump mapping demo, but would break others such as Toad Treasure Tracker with a naive `texture` implementation.
|
||||
|
||||
Also the CP configuration is missing, because it needs a tangent map implementation. It is currently marked with error_unimpl.
|
||||
|
||||
### samplerEnabledBitfields
|
||||
Holds the enabled state of the lighting samples for various PICA configurations
|
||||
As explained in https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTING_CONFIG0
|
||||
|
||||
```c
|
||||
const bool samplerEnabled[9 * 7] = bool[9 * 7](
|
||||
// D0 D1 SP FR RB RG RR
|
||||
true, false, true, false, false, false, true, // Configuration 0: D0, SP, RR
|
||||
false, false, true, true, false, false, true, // Configuration 1: FR, SP, RR
|
||||
true, true, false, false, false, false, true, // Configuration 2: D0, D1, RR
|
||||
true, true, false, true, false, false, false, // Configuration 3: D0, D1, FR
|
||||
true, true, true, false, true, true, true, // Configuration 4: All except for FR
|
||||
true, false, true, true, true, true, true, // Configuration 5: All except for D1
|
||||
true, true, true, true, false, false, true, // Configuration 6: All except for RB and RG
|
||||
false, false, false, false, false, false, false, // Configuration 7: Unused
|
||||
true, true, true, true, true, true, true // Configuration 8: All
|
||||
);
|
||||
```
|
||||
|
||||
The above has been condensed to two uints for performance reasons.
|
||||
You can confirm they are the same by running the following:
|
||||
```c
|
||||
const uint samplerEnabledBitfields[2] = { 0x7170e645u, 0x7f013fefu };
|
||||
for (int i = 0; i < 9 * 7; i++) {
|
||||
unsigned arrayIndex = (i >> 5);
|
||||
bool b = (samplerEnabledBitfields[arrayIndex] & (1u << (i & 31))) != 0u;
|
||||
if (samplerEnabled[i] == b) {
|
||||
printf("%d: happy\n", i);
|
||||
} else {
|
||||
printf("%d: unhappy\n", i);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### lightLutLookup
|
||||
lut_id is one of these values
|
||||
0 D0
|
||||
1 D1
|
||||
2 SP
|
||||
3 FR
|
||||
4 RB
|
||||
5 RG
|
||||
6 RR
|
||||
|
||||
lut_index on the other hand represents the actual index of the LUT in the texture
|
||||
u_tex_luts has 24 LUTs for lighting and they are used like so:
|
||||
0 D0
|
||||
1 D1
|
||||
2 is missing because SP uses LUTs 8-15
|
||||
3 FR
|
||||
4 RB
|
||||
5 RG
|
||||
6 RR
|
||||
8-15 SP0-7
|
||||
16-23 DA0-7, but this is not handled in this function as the lookup is a bit different
|
||||
|
||||
The light environment configuration controls which LUTs are available for use
|
||||
If a LUT is not available in the selected configuration, its value will always read a constant 1.0 regardless of the enable state in GPUREG_LIGHTING_CONFIG1
|
||||
If RR is enabled but not RG or RB, the output of RR is used for the three components; Red, Green and Blue.
|
||||
|
||||
### Distance attenuation
|
||||
Distance attenuation is computed differently from the other factors, for example
|
||||
it doesn't store its scale in GPUREG_LIGHTING_LUTINPUT_SCALE and it doesn't use
|
||||
GPUREG_LIGHTING_LUTINPUT_SELECT. Instead, it uses the distance from the light to the
|
||||
fragment and the distance attenuation scale and bias to calculate where in the LUT to look up.
|
||||
See: https://www.3dbrew.org/wiki/GPU/Internal_Registers#GPUREG_LIGHTi_ATTENUATION_SCALE
|
BIN
docs/img/KirbyAndroid.png
Normal file
After Width: | Height: | Size: 567 KiB |
Before Width: | Height: | Size: 2 MiB After Width: | Height: | Size: 2 MiB |
BIN
docs/img/rpog_icon.png
Normal file
After Width: | Height: | Size: 26 KiB |
BIN
docs/img/rsyn_icon.png
Normal file
After Width: | Height: | Size: 30 KiB |
BIN
docs/img/windows_alt_icon.ico
Normal file
After Width: | Height: | Size: 54 KiB |
BIN
docs/img/windows_icon.ico
Normal file
After Width: | Height: | Size: 54 KiB |
1
docs/img/windows_icon.rc
Normal file
|
@ -0,0 +1 @@
|
|||
AlberIcon ICON "windows_icon.ico"
|
34
docs/libretro/panda3ds_libretro.info
Normal file
|
@ -0,0 +1,34 @@
|
|||
# Software Information
|
||||
display_name = "Nintendo - 3DS (Panda3DS)"
|
||||
authors = "Panda3DS Authors (tm)"
|
||||
supported_extensions = "3ds|3dsx|elf|axf|cci|cxi|app"
|
||||
corename = "Panda3DS"
|
||||
categories = "Emulator"
|
||||
license = "GPLv3"
|
||||
permissions = ""
|
||||
display_version = "Git"
|
||||
|
||||
# Hardware Information
|
||||
manufacturer = "Nintendo"
|
||||
systemname = "3DS"
|
||||
systemid = "3ds"
|
||||
|
||||
# Libretro Information
|
||||
database = "Nintendo - Nintendo 3DS"
|
||||
supports_no_game = "false"
|
||||
savestate = "true"
|
||||
savestate_features = "basic"
|
||||
cheats = "false"
|
||||
input_descriptors = "true"
|
||||
memory_descriptors = "false"
|
||||
libretro_saves = "true"
|
||||
core_options = "true"
|
||||
core_options_version = "1.0"
|
||||
load_subsystem = "false"
|
||||
hw_render = "true"
|
||||
required_hw_api = "OpenGL Core >= 4.1"
|
||||
needs_fullpath = "true"
|
||||
disk_control = "false"
|
||||
is_experimental = "true"
|
||||
|
||||
description = "Panda3DS !"
|
45
include/PICA/draw_acceleration.hpp
Normal file
|
@ -0,0 +1,45 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct DrawAcceleration {
|
||||
static constexpr u32 maxAttribCount = 16;
|
||||
static constexpr u32 maxLoaderCount = 12;
|
||||
|
||||
struct AttributeInfo {
|
||||
u32 offset;
|
||||
u32 stride;
|
||||
|
||||
u8 type;
|
||||
u8 componentCount;
|
||||
|
||||
std::array<float, 4> fixedValue; // For fixed attributes
|
||||
};
|
||||
|
||||
struct Loader {
|
||||
// Data to upload for this loader
|
||||
u8* data;
|
||||
usize size;
|
||||
};
|
||||
|
||||
u8* indexBuffer;
|
||||
|
||||
// Minimum and maximum index in the index buffer for a draw call
|
||||
u16 minimumIndex, maximumIndex;
|
||||
u32 totalAttribCount;
|
||||
u32 totalLoaderCount;
|
||||
u32 enabledAttributeMask;
|
||||
u32 fixedAttributes;
|
||||
u32 vertexDataSize;
|
||||
|
||||
std::array<AttributeInfo, maxAttribCount> attributeInfo;
|
||||
std::array<Loader, maxLoaderCount> loaders;
|
||||
|
||||
bool canBeAccelerated;
|
||||
bool indexed;
|
||||
bool useShortIndices;
|
||||
};
|
||||
} // namespace PICA
|
|
@ -2,7 +2,7 @@
|
|||
#include "helpers.hpp"
|
||||
#include "vertex_loader_rec.hpp"
|
||||
|
||||
// Common file for our PICA JITs (From vertex config -> CPU assembly and from PICA shader -> CPU assembly)
|
||||
// Common file for our PICA JITs (From PICA shader -> CPU assembly)
|
||||
|
||||
namespace Dynapica {
|
||||
#ifdef PANDA3DS_DYNAPICA_SUPPORTED
|
||||
|
|
|
@ -22,8 +22,11 @@ class ShaderJIT {
|
|||
|
||||
ShaderCache cache;
|
||||
#endif
|
||||
bool accurateMul = false;
|
||||
|
||||
public:
|
||||
void setAccurateMul(bool value) { accurateMul = value; }
|
||||
|
||||
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
|
||||
// Call this before starting to process a batch of vertices
|
||||
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
|
||||
|
@ -36,11 +39,11 @@ class ShaderJIT {
|
|||
static constexpr bool isAvailable() { return true; }
|
||||
#else
|
||||
void prepare(PICAShader& shaderUnit) {
|
||||
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
|
||||
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Prepare on platform that does not support shader jit");
|
||||
}
|
||||
|
||||
void run(PICAShader& shaderUnit) {
|
||||
Helpers::panic("Vertex Loader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
|
||||
Helpers::panic("Shader JIT: Tried to run ShaderJIT::Run on platform that does not support shader jit");
|
||||
}
|
||||
|
||||
// Define dummy callback. This should never be called if the shader JIT is not supported
|
||||
|
|
|
@ -37,6 +37,8 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
|||
// Shows whether the loaded shader has any log2 and exp2 instructions
|
||||
bool codeHasLog2 = false;
|
||||
bool codeHasExp2 = false;
|
||||
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
|
||||
bool useSafeMUL = false;
|
||||
|
||||
oaknut::Label log2Func, exp2Func;
|
||||
oaknut::Label emitLog2Func();
|
||||
|
@ -123,7 +125,7 @@ class ShaderEmitter : private oaknut::CodeBlock, public oaknut::CodeGenerator {
|
|||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of memory allocated for the code buffer
|
||||
ShaderEmitter() : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {}
|
||||
ShaderEmitter(bool useSafeMUL) : oaknut::CodeBlock(allocSize), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()), useSafeMUL(useSafeMUL) {}
|
||||
|
||||
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
|
||||
InstructionCallback getInstructionCallback(u32 pc) { return getLabelPointer<InstructionCallback>(instructionLabels.at(pc)); }
|
||||
|
|
|
@ -32,6 +32,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
Label negateVector;
|
||||
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
|
||||
Label onesVector;
|
||||
// Vector value of (0xFF, 0xFF, 0xFF, 0) for setting the w component to 0 in DP3
|
||||
Label dp3Vector;
|
||||
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
|
||||
|
@ -43,12 +45,17 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
// Shows whether the loaded shader has any log2 and exp2 instructions
|
||||
bool codeHasLog2 = false;
|
||||
bool codeHasExp2 = false;
|
||||
// Whether to compile this shader using accurate, safe, non-IEEE multiplication (slow) or faster but less accurate mul
|
||||
bool useSafeMUL = false;
|
||||
|
||||
Xbyak::Label log2Func, exp2Func;
|
||||
Xbyak::Label emitLog2Func();
|
||||
Xbyak::Label emitExp2Func();
|
||||
Xbyak::util::Cpu cpuCaps;
|
||||
|
||||
// Emit a PICA200-compliant multiplication that handles "0 * inf = 0"
|
||||
void emitSafeMUL(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
|
||||
|
||||
// Compile all instructions from [current recompiler PC, end)
|
||||
void compileUntil(const PICAShader& shaderUnit, u32 endPC);
|
||||
// Compile instruction "instr"
|
||||
|
@ -125,7 +132,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||
ShaderEmitter() : Xbyak::CodeGenerator(allocSize) {
|
||||
ShaderEmitter(bool useSafeMUL) : Xbyak::CodeGenerator(allocSize), useSafeMUL(useSafeMUL) {
|
||||
cpuCaps = Xbyak::util::Cpu();
|
||||
|
||||
haveSSE4_1 = cpuCaps.has(Xbyak::util::Cpu::tSSE41);
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
|
||||
#include "PICA/draw_acceleration.hpp"
|
||||
#include "PICA/dynapica/shader_rec.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
|
@ -13,6 +14,12 @@
|
|||
#include "memory.hpp"
|
||||
#include "renderer.hpp"
|
||||
|
||||
enum class ShaderExecMode {
|
||||
Interpreter, // Interpret shaders on the CPU
|
||||
JIT, // Recompile shaders to CPU machine code
|
||||
Hardware, // Recompiler shaders to host shaders and run them on the GPU
|
||||
};
|
||||
|
||||
class GPU {
|
||||
static constexpr u32 regNum = 0x300;
|
||||
static constexpr u32 extRegNum = 0x1000;
|
||||
|
@ -45,7 +52,7 @@ class GPU {
|
|||
uint immediateModeVertIndex;
|
||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||
|
||||
template <bool indexed, bool useShaderJIT>
|
||||
template <bool indexed, ShaderExecMode mode>
|
||||
void drawArrays();
|
||||
|
||||
// Silly method of avoiding linking problems. TODO: Change to something less silly
|
||||
|
@ -81,6 +88,7 @@ class GPU {
|
|||
std::unique_ptr<Renderer> renderer;
|
||||
PICA::Vertex getImmediateModeVertex();
|
||||
|
||||
void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed);
|
||||
public:
|
||||
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
|
||||
// Encoded in PICA native format
|
||||
|
@ -92,6 +100,9 @@ class GPU {
|
|||
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
|
||||
bool lightingLUTDirty = false;
|
||||
|
||||
bool fogLUTDirty = false;
|
||||
std::array<uint32_t, 128> fogLUT;
|
||||
|
||||
GPU(Memory& mem, EmulatorConfig& config);
|
||||
void display() { renderer->display(); }
|
||||
void screenshot(const std::string& name) { renderer->screenshot(name); }
|
||||
|
@ -164,7 +175,8 @@ class GPU {
|
|||
u32 index = paddr - PhysicalAddrs::VRAM;
|
||||
return (T*)&vram[index];
|
||||
} else [[unlikely]] {
|
||||
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
Helpers::warn("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
257
include/PICA/pica_frag_config.hpp
Normal file
|
@ -0,0 +1,257 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "bitfield.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct OutputConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
// Merge the enable + compare function into 1 field to avoid duplicate shaders
|
||||
// enable == off means a CompareFunction of Always
|
||||
BitField<0, 3, CompareFunction> alphaTestFunction;
|
||||
BitField<3, 1, u32> depthMapEnable;
|
||||
BitField<4, 4, LogicOpMode> logicOpMode;
|
||||
};
|
||||
};
|
||||
|
||||
struct TextureConfig {
|
||||
u32 texUnitConfig;
|
||||
u32 texEnvUpdateBuffer;
|
||||
|
||||
// There's 6 TEV stages, and each one is configured via 4 word-sized registers
|
||||
// (+ the constant color register, which we don't include here, otherwise we'd generate too many shaders)
|
||||
std::array<u32, 4 * 6> tevConfigs;
|
||||
};
|
||||
|
||||
struct FogConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
|
||||
BitField<0, 3, FogMode> mode;
|
||||
BitField<3, 1, u32> flipDepth;
|
||||
};
|
||||
};
|
||||
|
||||
struct Light {
|
||||
union {
|
||||
u16 raw;
|
||||
BitField<0, 3, u16> num;
|
||||
BitField<3, 1, u16> directional;
|
||||
BitField<4, 1, u16> twoSidedDiffuse;
|
||||
BitField<5, 1, u16> distanceAttenuationEnable;
|
||||
BitField<6, 1, u16> spotAttenuationEnable;
|
||||
BitField<7, 1, u16> geometricFactor0;
|
||||
BitField<8, 1, u16> geometricFactor1;
|
||||
BitField<9, 1, u16> shadowEnable;
|
||||
};
|
||||
};
|
||||
|
||||
struct LightingLUTConfig {
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 1, u32> absInput;
|
||||
BitField<2, 3, u32> type;
|
||||
BitField<5, 3, u32> scale;
|
||||
};
|
||||
};
|
||||
|
||||
struct LightingConfig {
|
||||
union {
|
||||
u32 raw{};
|
||||
BitField<0, 1, u32> enable;
|
||||
BitField<1, 4, u32> lightNum;
|
||||
BitField<5, 2, u32> bumpMode;
|
||||
BitField<7, 2, u32> bumpSelector;
|
||||
BitField<9, 1, u32> bumpRenorm;
|
||||
BitField<10, 1, u32> clampHighlights;
|
||||
BitField<11, 4, u32> config;
|
||||
BitField<15, 1, u32> enablePrimaryAlpha;
|
||||
BitField<16, 1, u32> enableSecondaryAlpha;
|
||||
BitField<17, 1, u32> enableShadow;
|
||||
BitField<18, 1, u32> shadowPrimary;
|
||||
BitField<19, 1, u32> shadowSecondary;
|
||||
BitField<20, 1, u32> shadowInvert;
|
||||
BitField<21, 1, u32> shadowAlpha;
|
||||
BitField<22, 2, u32> shadowSelector;
|
||||
};
|
||||
|
||||
std::array<LightingLUTConfig, 7> luts{};
|
||||
|
||||
std::array<Light, 8> lights{};
|
||||
|
||||
LightingConfig(const std::array<u32, 0x300>& regs) {
|
||||
// Ignore lighting registers if it's disabled
|
||||
if ((regs[InternalRegs::LightingEnable] & 1) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 config0 = regs[InternalRegs::LightConfig0];
|
||||
const u32 config1 = regs[InternalRegs::LightConfig1];
|
||||
const u32 totalLightCount = Helpers::getBits<0, 3>(regs[InternalRegs::LightNumber]) + 1;
|
||||
|
||||
enable = 1;
|
||||
lightNum = totalLightCount;
|
||||
|
||||
enableShadow = Helpers::getBit<0>(config0);
|
||||
if (enableShadow) [[unlikely]] {
|
||||
shadowPrimary = Helpers::getBit<16>(config0);
|
||||
shadowSecondary = Helpers::getBit<17>(config0);
|
||||
shadowInvert = Helpers::getBit<18>(config0);
|
||||
shadowAlpha = Helpers::getBit<19>(config0);
|
||||
shadowSelector = Helpers::getBits<24, 2>(config0);
|
||||
}
|
||||
|
||||
enablePrimaryAlpha = Helpers::getBit<2>(config0);
|
||||
enableSecondaryAlpha = Helpers::getBit<3>(config0);
|
||||
config = Helpers::getBits<4, 4>(config0);
|
||||
|
||||
bumpSelector = Helpers::getBits<22, 2>(config0);
|
||||
clampHighlights = Helpers::getBit<27>(config0);
|
||||
bumpMode = Helpers::getBits<28, 2>(config0);
|
||||
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
|
||||
|
||||
for (int i = 0; i < totalLightCount; i++) {
|
||||
auto& light = lights[i];
|
||||
light.num = (regs[InternalRegs::LightPermutation] >> (i * 4)) & 0x7;
|
||||
|
||||
const u32 lightConfig = regs[InternalRegs::Light0Config + 0x10 * light.num];
|
||||
light.directional = Helpers::getBit<0>(lightConfig);
|
||||
light.twoSidedDiffuse = Helpers::getBit<1>(lightConfig);
|
||||
light.geometricFactor0 = Helpers::getBit<2>(lightConfig);
|
||||
light.geometricFactor1 = Helpers::getBit<3>(lightConfig);
|
||||
|
||||
light.shadowEnable = ((config1 >> light.num) & 1) ^ 1; // This also does 0 = enabled
|
||||
light.spotAttenuationEnable = ((config1 >> (8 + light.num)) & 1) ^ 1; // Same here
|
||||
light.distanceAttenuationEnable = ((config1 >> (24 + light.num)) & 1) ^ 1; // Of course same here
|
||||
}
|
||||
|
||||
LightingLUTConfig& d0 = luts[Lights::LUT_D0];
|
||||
LightingLUTConfig& d1 = luts[Lights::LUT_D1];
|
||||
LightingLUTConfig& sp = luts[spotlightLutIndex];
|
||||
LightingLUTConfig& fr = luts[Lights::LUT_FR];
|
||||
LightingLUTConfig& rb = luts[Lights::LUT_RB];
|
||||
LightingLUTConfig& rg = luts[Lights::LUT_RG];
|
||||
LightingLUTConfig& rr = luts[Lights::LUT_RR];
|
||||
|
||||
d0.enable = Helpers::getBit<16>(config1) == 0;
|
||||
d1.enable = Helpers::getBit<17>(config1) == 0;
|
||||
fr.enable = Helpers::getBit<19>(config1) == 0;
|
||||
rb.enable = Helpers::getBit<20>(config1) == 0;
|
||||
rg.enable = Helpers::getBit<21>(config1) == 0;
|
||||
rr.enable = Helpers::getBit<22>(config1) == 0;
|
||||
sp.enable = 1;
|
||||
|
||||
const u32 lutAbs = regs[InternalRegs::LightLUTAbs];
|
||||
const u32 lutSelect = regs[InternalRegs::LightLUTSelect];
|
||||
const u32 lutScale = regs[InternalRegs::LightLUTScale];
|
||||
|
||||
if (d0.enable) {
|
||||
d0.absInput = Helpers::getBit<1>(lutAbs) == 0;
|
||||
d0.type = Helpers::getBits<0, 3>(lutSelect);
|
||||
d0.scale = Helpers::getBits<0, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (d1.enable) {
|
||||
d1.absInput = Helpers::getBit<5>(lutAbs) == 0;
|
||||
d1.type = Helpers::getBits<4, 3>(lutSelect);
|
||||
d1.scale = Helpers::getBits<4, 3>(lutScale);
|
||||
}
|
||||
|
||||
sp.absInput = Helpers::getBit<9>(lutAbs) == 0;
|
||||
sp.type = Helpers::getBits<8, 3>(lutSelect);
|
||||
sp.scale = Helpers::getBits<8, 3>(lutScale);
|
||||
|
||||
if (fr.enable) {
|
||||
fr.absInput = Helpers::getBit<13>(lutAbs) == 0;
|
||||
fr.type = Helpers::getBits<12, 3>(lutSelect);
|
||||
fr.scale = Helpers::getBits<12, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (rb.enable) {
|
||||
rb.absInput = Helpers::getBit<17>(lutAbs) == 0;
|
||||
rb.type = Helpers::getBits<16, 3>(lutSelect);
|
||||
rb.scale = Helpers::getBits<16, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (rg.enable) {
|
||||
rg.absInput = Helpers::getBit<21>(lutAbs) == 0;
|
||||
rg.type = Helpers::getBits<20, 3>(lutSelect);
|
||||
rg.scale = Helpers::getBits<20, 3>(lutScale);
|
||||
}
|
||||
|
||||
if (rr.enable) {
|
||||
rr.absInput = Helpers::getBit<25>(lutAbs) == 0;
|
||||
rr.type = Helpers::getBits<24, 3>(lutSelect);
|
||||
rr.scale = Helpers::getBits<24, 3>(lutScale);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Config used for identifying unique fragment pipeline configurations
|
||||
struct FragmentConfig {
|
||||
OutputConfig outConfig;
|
||||
TextureConfig texConfig;
|
||||
FogConfig fogConfig;
|
||||
LightingConfig lighting;
|
||||
|
||||
bool operator==(const FragmentConfig& config) const {
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
|
||||
}
|
||||
|
||||
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
|
||||
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
|
||||
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
|
||||
|
||||
outConfig.alphaTestFunction =
|
||||
(alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
|
||||
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
|
||||
|
||||
// Shows if blending is enabled. If it is not enabled, then logic ops are enabled instead
|
||||
const bool blendingEnabled = (regs[InternalRegs::ColourOperation] & (1 << 8)) != 0;
|
||||
outConfig.logicOpMode = blendingEnabled ? LogicOpMode::Copy : LogicOpMode(Helpers::getBits<0, 4>(regs[InternalRegs::LogicOp]));
|
||||
|
||||
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
|
||||
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
|
||||
|
||||
// Set up TEV stages. Annoyingly we can't just memcpy as the TEV registers are arranged like
|
||||
// {Source, Operand, Combiner, Color, Scale} and we want to skip the color register since it's uploaded via UBO
|
||||
#define setupTevStage(stage) \
|
||||
std::memcpy(&texConfig.tevConfigs[stage * 4], ®s[InternalRegs::TexEnv##stage##Source], 3 * sizeof(u32)); \
|
||||
texConfig.tevConfigs[stage * 4 + 3] = regs[InternalRegs::TexEnv##stage##Source + 4];
|
||||
|
||||
setupTevStage(0);
|
||||
setupTevStage(1);
|
||||
setupTevStage(2);
|
||||
setupTevStage(3);
|
||||
setupTevStage(4);
|
||||
setupTevStage(5);
|
||||
#undef setupTevStage
|
||||
|
||||
fogConfig.mode = (FogMode)Helpers::getBits<0, 3>(regs[InternalRegs::TexEnvUpdateBuffer]);
|
||||
|
||||
if (fogConfig.mode == FogMode::Fog) {
|
||||
fogConfig.flipDepth = Helpers::getBit<16>(regs[InternalRegs::TexEnvUpdateBuffer]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(
|
||||
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
|
||||
std::has_unique_object_representations<FogConfig>() && std::has_unique_object_representations<Light>()
|
||||
);
|
||||
} // namespace PICA
|
||||
|
||||
// Override std::hash for our fragment config class
|
||||
template <>
|
||||
struct std::hash<PICA::FragmentConfig> {
|
||||
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
|
||||
};
|
47
include/PICA/pica_frag_uniforms.hpp
Normal file
|
@ -0,0 +1,47 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct LightUniform {
|
||||
using vec3 = std::array<float, 3>;
|
||||
|
||||
// std140 requires vec3s be aligned to 16 bytes
|
||||
alignas(16) vec3 specular0;
|
||||
alignas(16) vec3 specular1;
|
||||
alignas(16) vec3 diffuse;
|
||||
alignas(16) vec3 ambient;
|
||||
alignas(16) vec3 position;
|
||||
alignas(16) vec3 spotlightDirection;
|
||||
|
||||
float distanceAttenuationBias;
|
||||
float distanceAttenuationScale;
|
||||
};
|
||||
|
||||
struct FragmentUniforms {
|
||||
using vec3 = std::array<float, 3>;
|
||||
using vec4 = std::array<float, 4>;
|
||||
static constexpr usize tevStageCount = 6;
|
||||
|
||||
s32 alphaReference;
|
||||
float depthScale;
|
||||
float depthOffset;
|
||||
|
||||
alignas(16) vec4 constantColors[tevStageCount];
|
||||
alignas(16) vec4 tevBufferColor;
|
||||
alignas(16) vec4 clipCoords;
|
||||
|
||||
// Note: We upload these as a u32 and decode on GPU.
|
||||
// Particularly the fog colour since fog is really uncommon and it doesn't matter if we decode on GPU.
|
||||
u32 globalAmbientLight;
|
||||
u32 fogColor;
|
||||
// NOTE: THIS MUST BE LAST so that if lighting is disabled we can potentially omit uploading it
|
||||
LightUniform lightUniforms[8];
|
||||
};
|
||||
|
||||
// Assert that lightUniforms is the last member of the structure
|
||||
static_assert(offsetof(FragmentUniforms, lightUniforms) + 8 * sizeof(LightUniform) == sizeof(FragmentUniforms));
|
||||
} // namespace PICA
|
274
include/PICA/pica_simd.hpp
Normal file
|
@ -0,0 +1,274 @@
|
|||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
#if defined(_M_AMD64) || defined(__x86_64__)
|
||||
#define PICA_SIMD_X64
|
||||
#include <immintrin.h>
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
#define PICA_SIMD_ARM64
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Optimized functions for analyzing PICA index buffers (Finding minimum and maximum index values inside them)
|
||||
namespace PICA::IndexBuffer {
|
||||
// Non-SIMD, portable algorithm
|
||||
template <bool useShortIndices>
|
||||
std::pair<u16, u16> analyzePortable(u8* indexBuffer, u32 vertexCount) {
|
||||
u16 minimumIndex = std::numeric_limits<u16>::max();
|
||||
u16 maximumIndex = 0;
|
||||
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
if constexpr (useShortIndices) {
|
||||
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
|
||||
|
||||
for (u32 i = 0; i < vertexCount; i++) {
|
||||
u16 index = indexBuffer16[i];
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
} else {
|
||||
for (u32 i = 0; i < vertexCount; i++) {
|
||||
u16 index = u16(indexBuffer[i]);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
}
|
||||
|
||||
return {minimumIndex, maximumIndex};
|
||||
}
|
||||
|
||||
#ifdef PICA_SIMD_ARM64
|
||||
template <bool useShortIndices>
|
||||
std::pair<u16, u16> analyzeNEON(u8* indexBuffer, u32 vertexCount) {
|
||||
// We process 16 bytes per iteration, which is 8 vertices if we're using u16 indices or 16 vertices if we're using u8 indices
|
||||
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
|
||||
|
||||
if (vertexCount < vertsPerLoop) {
|
||||
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
|
||||
}
|
||||
|
||||
u16 minimumIndex, maximumIndex;
|
||||
|
||||
if constexpr (useShortIndices) {
|
||||
// 16-bit indices
|
||||
uint16x8_t minima = vdupq_n_u16(0xffff);
|
||||
uint16x8_t maxima = vdupq_n_u16(0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
const uint16x8_t data = vld1q_u16(reinterpret_cast<u16*>(indexBuffer));
|
||||
minima = vminq_u16(data, minima);
|
||||
maxima = vmaxq_u16(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
// Do horizontal min/max operations to get the actual minimum and maximum from all the vertices we processed with SIMD
|
||||
// We want to gather the actual minimum and maximum in the line bottom lane of the minima/maxima vectors
|
||||
// uint16x4_t foldedMinima1 = vmin_u16(vget_high_u16(minima), vget_low_u16(minima));
|
||||
// uint16x4_t foldedMaxima1 = vmax_u16(vget_high_u16(maxima), vget_low_u16(maxima));
|
||||
|
||||
uint16x8_t foldedMinima1 = vpminq_u16(minima, minima);
|
||||
uint16x8_t foldedMinima2 = vpminq_u16(foldedMinima1, foldedMinima1);
|
||||
uint16x8_t foldedMinima3 = vpminq_u16(foldedMinima2, foldedMinima2);
|
||||
|
||||
uint16x8_t foldedMaxima1 = vpmaxq_u16(maxima, maxima);
|
||||
uint16x8_t foldedMaxima2 = vpmaxq_u16(foldedMaxima1, foldedMaxima1);
|
||||
uint16x8_t foldedMaxima3 = vpmaxq_u16(foldedMaxima2, foldedMaxima2);
|
||||
|
||||
minimumIndex = vgetq_lane_u16(foldedMinima3, 0);
|
||||
maximumIndex = vgetq_lane_u16(foldedMaxima3, 0);
|
||||
} else {
|
||||
// 8-bit indices
|
||||
uint8x16_t minima = vdupq_n_u8(0xff);
|
||||
uint8x16_t maxima = vdupq_n_u8(0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
uint8x16_t data = vld1q_u8(indexBuffer);
|
||||
minima = vminq_u8(data, minima);
|
||||
maxima = vmaxq_u8(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
// Do a similar horizontal min/max as in the u16 case, except now we're working uint8x16 instead of uint16x4 so we need 4 folds
|
||||
uint8x16_t foldedMinima1 = vpminq_u8(minima, minima);
|
||||
uint8x16_t foldedMinima2 = vpminq_u8(foldedMinima1, foldedMinima1);
|
||||
uint8x16_t foldedMinima3 = vpminq_u8(foldedMinima2, foldedMinima2);
|
||||
uint8x16_t foldedMinima4 = vpminq_u8(foldedMinima3, foldedMinima3);
|
||||
|
||||
uint8x16_t foldedMaxima1 = vpmaxq_u8(maxima, maxima);
|
||||
uint8x16_t foldedMaxima2 = vpmaxq_u8(foldedMaxima1, foldedMaxima1);
|
||||
uint8x16_t foldedMaxima3 = vpmaxq_u8(foldedMaxima2, foldedMaxima2);
|
||||
uint8x16_t foldedMaxima4 = vpmaxq_u8(foldedMaxima3, foldedMaxima3);
|
||||
|
||||
minimumIndex = u16(vgetq_lane_u8(foldedMinima4, 0));
|
||||
maximumIndex = u16(vgetq_lane_u8(foldedMaxima4, 0));
|
||||
}
|
||||
|
||||
// If any indices could not be processed cause the buffer size is not 16-byte aligned, process them the naive way
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
while (vertexCount > 0) {
|
||||
if constexpr (useShortIndices) {
|
||||
u16 index = *reinterpret_cast<u16*>(indexBuffer);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
indexBuffer += 2;
|
||||
} else {
|
||||
u16 index = u16(*indexBuffer++);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
|
||||
vertexCount -= 1;
|
||||
}
|
||||
|
||||
return {minimumIndex, maximumIndex};
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
template <bool useShortIndices>
|
||||
std::pair<u16, u16> analyzeSSE4_1(u8* indexBuffer, u32 vertexCount) {
|
||||
// We process 16 bytes per iteration, which is 8 vertices if we're using u16
|
||||
// indices or 16 vertices if we're using u8 indices
|
||||
constexpr u32 vertsPerLoop = (useShortIndices) ? 8 : 16;
|
||||
|
||||
if (vertexCount < vertsPerLoop) {
|
||||
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
|
||||
}
|
||||
|
||||
u16 minimumIndex, maximumIndex;
|
||||
|
||||
if constexpr (useShortIndices) {
|
||||
// Calculate the horizontal minimum/maximum value across an SSE vector of 16-bit unsigned integers.
|
||||
// Based on https://stackoverflow.com/a/22259607
|
||||
auto horizontalMin16 = [](__m128i vector) -> u16 { return u16(_mm_cvtsi128_si32(_mm_minpos_epu16(vector))); };
|
||||
|
||||
auto horizontalMax16 = [](__m128i vector) -> u16 {
|
||||
// We have an instruction to compute horizontal minimum but not maximum, so we use it.
|
||||
// To use it, we have to subtract each value from 0xFFFF (which we do with an xor), then execute a horizontal minimum
|
||||
__m128i flipped = _mm_xor_si128(vector, _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu));
|
||||
u16 min = u16(_mm_cvtsi128_si32(_mm_minpos_epu16(flipped)));
|
||||
return u16(min ^ 0xffff);
|
||||
};
|
||||
|
||||
// 16-bit indices
|
||||
// Initialize the minima vector to all FFs (So 0xFFFF for each 16-bit lane)
|
||||
// And the maxima vector to all 0s (0 for each 16-bit lane)
|
||||
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
|
||||
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
|
||||
minima = _mm_min_epu16(data, minima);
|
||||
maxima = _mm_max_epu16(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
minimumIndex = u16(horizontalMin16(minima));
|
||||
maximumIndex = u16(horizontalMax16(maxima));
|
||||
} else {
|
||||
// Calculate the horizontal minimum/maximum value across an SSE vector of 8-bit unsigned integers.
|
||||
// Based on https://stackoverflow.com/a/22259607
|
||||
auto horizontalMin8 = [](__m128i vector) -> u8 {
|
||||
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
vector = _mm_min_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_min_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_min_epu8(vector, _mm_srli_epi16(vector, 8));
|
||||
return u8(_mm_cvtsi128_si32(vector));
|
||||
};
|
||||
|
||||
auto horizontalMax8 = [](__m128i vector) -> u8 {
|
||||
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
vector = _mm_max_epu8(vector, _mm_shuffle_epi32(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_max_epu8(vector, _mm_shufflelo_epi16(vector, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
vector = _mm_max_epu8(vector, _mm_srli_epi16(vector, 8));
|
||||
return u8(_mm_cvtsi128_si32(vector));
|
||||
};
|
||||
|
||||
// 8-bit indices
|
||||
// Initialize the minima vector to all FFs (So 0xFF for each 8-bit lane)
|
||||
// And the maxima vector to all 0s (0 for each 8-bit lane)
|
||||
__m128i minima = _mm_set_epi32(0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu);
|
||||
__m128i maxima = _mm_set_epi32(0, 0, 0, 0);
|
||||
|
||||
while (vertexCount >= vertsPerLoop) {
|
||||
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(indexBuffer));
|
||||
minima = _mm_min_epu8(data, minima);
|
||||
maxima = _mm_max_epu8(data, maxima);
|
||||
|
||||
indexBuffer += 16;
|
||||
vertexCount -= vertsPerLoop;
|
||||
}
|
||||
|
||||
minimumIndex = u16(horizontalMin8(minima));
|
||||
maximumIndex = u16(horizontalMax8(maxima));
|
||||
}
|
||||
|
||||
// If any indices could not be processed cause the buffer size
|
||||
// is not 16-byte aligned, process them the naive way
|
||||
// Calculate the minimum and maximum indices used in the index
|
||||
// buffer, so we'll only upload them
|
||||
while (vertexCount > 0) {
|
||||
if constexpr (useShortIndices) {
|
||||
u16 index = *reinterpret_cast<u16*>(indexBuffer);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
indexBuffer += 2;
|
||||
} else {
|
||||
u16 index = u16(*indexBuffer++);
|
||||
minimumIndex = std::min(minimumIndex, index);
|
||||
maximumIndex = std::max(maximumIndex, index);
|
||||
}
|
||||
|
||||
vertexCount -= 1;
|
||||
}
|
||||
|
||||
return {minimumIndex, maximumIndex};
|
||||
}
|
||||
#endif
|
||||
|
||||
// Analyzes a PICA index buffer to get the minimum and maximum indices in the
|
||||
// buffer, and returns them in a pair in the form [min, max]. Takes a template
|
||||
// parameter to decide whether the indices in the buffer are u8 or u16
|
||||
template <bool useShortIndices>
|
||||
std::pair<u16, u16> analyze(u8* indexBuffer, u32 vertexCount) {
|
||||
#if defined(PICA_SIMD_ARM64)
|
||||
return analyzeNEON<useShortIndices>(indexBuffer, vertexCount);
|
||||
#elif defined(PICA_SIMD_X64) && (defined(__SSE4_1__) || defined(__AVX__))
|
||||
// Annoyingly, MSVC refuses to define __SSE4_1__ even when we're building with AVX
|
||||
return analyzeSSE4_1<useShortIndices>(indexBuffer, vertexCount);
|
||||
#else
|
||||
return analyzePortable<useShortIndices>(indexBuffer, vertexCount);
|
||||
#endif
|
||||
}
|
||||
|
||||
// In some really unfortunate scenarios (eg Android Studio emulator), we don't have access to glDrawRangeElementsBaseVertex
|
||||
// So we need to subtract the base vertex index from every index in the index buffer ourselves
|
||||
// This is not really common, so we do it without SIMD for the moment, just to be able to run on Android Studio
|
||||
template <bool useShortIndices>
|
||||
void subtractBaseIndex(u8* indexBuffer, u32 indexCount, u16 baseIndex) {
|
||||
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
|
||||
if constexpr (useShortIndices) {
|
||||
u16* indexBuffer16 = reinterpret_cast<u16*>(indexBuffer);
|
||||
|
||||
for (u32 i = 0; i < indexCount; i++) {
|
||||
indexBuffer16[i] -= baseIndex;
|
||||
}
|
||||
} else {
|
||||
u8 baseIndex8 = u8(baseIndex);
|
||||
|
||||
for (u32 i = 0; i < indexCount; i++) {
|
||||
indexBuffer[i] -= baseIndex8;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace PICA::IndexBuffer
|
57
include/PICA/pica_vert_config.hpp
Normal file
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader.hpp"
|
||||
#include "bitfield.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
// Configuration struct used
|
||||
struct VertConfig {
|
||||
PICAHash::HashType shaderHash;
|
||||
PICAHash::HashType opdescHash;
|
||||
u32 entrypoint;
|
||||
|
||||
// PICA registers for configuring shader output->fragment semantic mapping
|
||||
std::array<u32, 7> outmaps{};
|
||||
u16 outputMask;
|
||||
u8 outputCount;
|
||||
bool usingUbershader;
|
||||
|
||||
// Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup
|
||||
// As the padding will get hashed and memcmp'd...
|
||||
u32 pad{};
|
||||
|
||||
bool operator==(const VertConfig& config) const {
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
return std::memcmp(this, &config, sizeof(VertConfig)) == 0;
|
||||
}
|
||||
|
||||
VertConfig(PICAShader& shader, const std::array<u32, 0x300>& regs, bool usingUbershader) : usingUbershader(usingUbershader) {
|
||||
shaderHash = shader.getCodeHash();
|
||||
opdescHash = shader.getOpdescHash();
|
||||
entrypoint = shader.entrypoint;
|
||||
|
||||
outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
||||
outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask];
|
||||
for (int i = 0; i < outputCount; i++) {
|
||||
// Mask out unused bits
|
||||
outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace PICA
|
||||
|
||||
static_assert(sizeof(PICA::VertConfig) == 56);
|
||||
|
||||
// Override std::hash for our vertex config class
|
||||
template <>
|
||||
struct std::hash<PICA::VertConfig> {
|
||||
std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
|
||||
};
|
|
@ -51,6 +51,18 @@ namespace PICA {
|
|||
#undef defineTexEnv
|
||||
// clang-format on
|
||||
|
||||
// Fog registers
|
||||
FogColor = 0xE1,
|
||||
FogLUTIndex = 0xE6,
|
||||
FogLUTData0 = 0xE8,
|
||||
FogLUTData1 = 0xE9,
|
||||
FogLUTData2 = 0xEA,
|
||||
FogLUTData3 = 0xEB,
|
||||
FogLUTData4 = 0xEC,
|
||||
FogLUTData5 = 0xED,
|
||||
FogLUTData6 = 0xEE,
|
||||
FogLUTData7 = 0xEF,
|
||||
|
||||
// Framebuffer registers
|
||||
ColourOperation = 0x100,
|
||||
BlendFunc = 0x101,
|
||||
|
@ -67,7 +79,29 @@ namespace PICA {
|
|||
ColourBufferLoc = 0x11D,
|
||||
FramebufferSize = 0x11E,
|
||||
|
||||
//LightingRegs
|
||||
// Lighting registers
|
||||
LightingEnable = 0x8F,
|
||||
Light0Specular0 = 0x140,
|
||||
Light0Specular1 = 0x141,
|
||||
Light0Diffuse = 0x142,
|
||||
Light0Ambient = 0x143,
|
||||
Light0XY = 0x144,
|
||||
Light0Z = 0x145,
|
||||
Light0SpotlightXY = 0x146,
|
||||
Light0SpotlightZ = 0x147,
|
||||
Light0Config = 0x149,
|
||||
Light0AttenuationBias = 0x14A,
|
||||
Light0AttenuationScale = 0x14B,
|
||||
|
||||
LightGlobalAmbient = 0x1C0,
|
||||
LightNumber = 0x1C2,
|
||||
LightConfig0 = 0x1C3,
|
||||
LightConfig1 = 0x1C4,
|
||||
LightPermutation = 0x1D9,
|
||||
LightLUTAbs = 0x1D0,
|
||||
LightLUTSelect = 0x1D1,
|
||||
LightLUTScale = 0x1D2,
|
||||
|
||||
LightingLUTIndex = 0x01C5,
|
||||
LightingLUTData0 = 0x01C8,
|
||||
LightingLUTData1 = 0x01C9,
|
||||
|
@ -231,7 +265,8 @@ namespace PICA {
|
|||
enum : u32 {
|
||||
LUT_D0 = 0,
|
||||
LUT_D1,
|
||||
LUT_FR,
|
||||
// LUT 2 is not used, the emulator internally uses it for referring to the current source's spotlight in shaders
|
||||
LUT_FR = 0x3,
|
||||
LUT_RB,
|
||||
LUT_RG,
|
||||
LUT_RR,
|
||||
|
@ -255,6 +290,11 @@ namespace PICA {
|
|||
};
|
||||
}
|
||||
|
||||
// There's actually 8 different LUTs (SP0-SP7), one for each light with different indices (8-15)
|
||||
// We use an unused LUT value for "this light source's spotlight" instead and figure out which light source to use in compileLutLookup
|
||||
// This is particularly intuitive in several places, such as checking if a LUT is enabled
|
||||
static constexpr int spotlightLutIndex = 2;
|
||||
|
||||
enum class TextureFmt : u32 {
|
||||
RGBA8 = 0x0,
|
||||
RGB8 = 0x1,
|
||||
|
@ -345,4 +385,156 @@ namespace PICA {
|
|||
GeometryPrimitive = 3,
|
||||
};
|
||||
|
||||
enum class CompareFunction : u32 {
|
||||
Never = 0,
|
||||
Always = 1,
|
||||
Equal = 2,
|
||||
NotEqual = 3,
|
||||
Less = 4,
|
||||
LessOrEqual = 5,
|
||||
Greater = 6,
|
||||
GreaterOrEqual = 7,
|
||||
};
|
||||
|
||||
enum class LogicOpMode : u32 {
|
||||
Clear = 0,
|
||||
And = 1,
|
||||
ReverseAnd = 2,
|
||||
Copy = 3,
|
||||
Set = 4,
|
||||
InvertedCopy = 5,
|
||||
Nop = 6,
|
||||
Invert = 7,
|
||||
Nand = 8,
|
||||
Or = 9,
|
||||
Nor = 10,
|
||||
Xor = 11,
|
||||
Equiv = 12,
|
||||
InvertedAnd = 13,
|
||||
ReverseOr = 14,
|
||||
InvertedOr = 15,
|
||||
};
|
||||
|
||||
enum class FogMode : u32 {
|
||||
Disabled = 0,
|
||||
Fog = 5,
|
||||
Gas = 7,
|
||||
};
|
||||
|
||||
struct TexEnvConfig {
|
||||
enum class Source : u8 {
|
||||
PrimaryColor = 0x0,
|
||||
PrimaryFragmentColor = 0x1,
|
||||
SecondaryFragmentColor = 0x2,
|
||||
Texture0 = 0x3,
|
||||
Texture1 = 0x4,
|
||||
Texture2 = 0x5,
|
||||
Texture3 = 0x6,
|
||||
// TODO: Inbetween values are unknown
|
||||
PreviousBuffer = 0xD,
|
||||
Constant = 0xE,
|
||||
Previous = 0xF,
|
||||
};
|
||||
|
||||
enum class ColorOperand : u8 {
|
||||
SourceColor = 0x0,
|
||||
OneMinusSourceColor = 0x1,
|
||||
SourceAlpha = 0x2,
|
||||
OneMinusSourceAlpha = 0x3,
|
||||
SourceRed = 0x4,
|
||||
OneMinusSourceRed = 0x5,
|
||||
// TODO: Inbetween values are unknown
|
||||
SourceGreen = 0x8,
|
||||
OneMinusSourceGreen = 0x9,
|
||||
// Inbetween values are unknown
|
||||
SourceBlue = 0xC,
|
||||
OneMinusSourceBlue = 0xD,
|
||||
};
|
||||
|
||||
enum class AlphaOperand : u8 {
|
||||
SourceAlpha = 0x0,
|
||||
OneMinusSourceAlpha = 0x1,
|
||||
SourceRed = 0x2,
|
||||
OneMinusSourceRed = 0x3,
|
||||
SourceGreen = 0x4,
|
||||
OneMinusSourceGreen = 0x5,
|
||||
SourceBlue = 0x6,
|
||||
OneMinusSourceBlue = 0x7,
|
||||
};
|
||||
|
||||
enum class Operation : u8 {
|
||||
Replace = 0,
|
||||
Modulate = 1,
|
||||
Add = 2,
|
||||
AddSigned = 3,
|
||||
Lerp = 4,
|
||||
Subtract = 5,
|
||||
Dot3RGB = 6,
|
||||
Dot3RGBA = 7,
|
||||
MultiplyAdd = 8,
|
||||
AddMultiply = 9,
|
||||
};
|
||||
|
||||
// RGB sources
|
||||
Source colorSource1, colorSource2, colorSource3;
|
||||
// Alpha sources
|
||||
Source alphaSource1, alphaSource2, alphaSource3;
|
||||
|
||||
// RGB operands
|
||||
ColorOperand colorOperand1, colorOperand2, colorOperand3;
|
||||
// Alpha operands
|
||||
AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3;
|
||||
|
||||
// Texture environment operations for this stage
|
||||
Operation colorOp, alphaOp;
|
||||
|
||||
u32 constColor;
|
||||
|
||||
private:
|
||||
// These are the only private members since their value doesn't actually reflect the scale
|
||||
// So we make them public so we'll always use the appropriate member functions instead
|
||||
u8 colorScale;
|
||||
u8 alphaScale;
|
||||
|
||||
public:
|
||||
// Create texture environment object from TEV registers
|
||||
TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) {
|
||||
colorSource1 = Helpers::getBits<0, 4, Source>(source);
|
||||
colorSource2 = Helpers::getBits<4, 4, Source>(source);
|
||||
colorSource3 = Helpers::getBits<8, 4, Source>(source);
|
||||
|
||||
alphaSource1 = Helpers::getBits<16, 4, Source>(source);
|
||||
alphaSource2 = Helpers::getBits<20, 4, Source>(source);
|
||||
alphaSource3 = Helpers::getBits<24, 4, Source>(source);
|
||||
|
||||
colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand);
|
||||
colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand);
|
||||
colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand);
|
||||
|
||||
alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand);
|
||||
alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand);
|
||||
alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand);
|
||||
|
||||
colorOp = Helpers::getBits<0, 4, Operation>(combiner);
|
||||
alphaOp = Helpers::getBits<16, 4, Operation>(combiner);
|
||||
|
||||
colorScale = Helpers::getBits<0, 2>(scale);
|
||||
alphaScale = Helpers::getBits<16, 2>(scale);
|
||||
}
|
||||
|
||||
u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; }
|
||||
u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; }
|
||||
|
||||
bool isPassthroughStage() {
|
||||
// clang-format off
|
||||
// Thank you to the Citra dev that wrote this out
|
||||
return (
|
||||
colorOp == Operation::Replace && alphaOp == Operation::Replace &&
|
||||
colorSource1 == Source::Previous && alphaSource1 == Source::Previous &&
|
||||
colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha &&
|
||||
getColorScale() == 1 && getAlphaScale() == 1
|
||||
);
|
||||
// clang-format on
|
||||
}
|
||||
};
|
||||
} // namespace PICA
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
|
@ -21,7 +23,7 @@ namespace ShaderOpcodes {
|
|||
DST = 0x04,
|
||||
EX2 = 0x05,
|
||||
LG2 = 0x06,
|
||||
LIT = 0x07,
|
||||
LITP = 0x07,
|
||||
MUL = 0x08,
|
||||
SGE = 0x09,
|
||||
SLT = 0x0A,
|
||||
|
@ -56,6 +58,10 @@ namespace ShaderOpcodes {
|
|||
};
|
||||
}
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
class ShaderDecompiler;
|
||||
};
|
||||
|
||||
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
|
||||
class PICAShader {
|
||||
using f24 = Floats::f24;
|
||||
|
@ -90,14 +96,22 @@ class PICAShader {
|
|||
public:
|
||||
// These are placed close to the temp registers and co because it helps the JIT generate better code
|
||||
u32 entrypoint = 0; // Initial shader PC
|
||||
u32 boolUniform;
|
||||
std::array<std::array<u8, 4>, 4> intUniforms;
|
||||
|
||||
// We want these registers in this order & with this alignment for uploading them directly to a UBO
|
||||
// When emulating shaders on the GPU. Plus this alignment for float uniforms is necessary for doing SIMD in the shader->CPU recompilers.
|
||||
alignas(16) std::array<vec4f, 96> floatUniforms;
|
||||
alignas(16) std::array<std::array<u8, 4>, 4> intUniforms;
|
||||
u32 boolUniform;
|
||||
|
||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> outputs;
|
||||
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||
|
||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
protected:
|
||||
std::array<u32, 128> operandDescriptors;
|
||||
|
@ -116,20 +130,20 @@ class PICAShader {
|
|||
std::array<CallInfo, 4> callInfo;
|
||||
ShaderType type;
|
||||
|
||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
||||
|
||||
public:
|
||||
bool uniformsDirty = false;
|
||||
|
||||
protected:
|
||||
bool codeHashDirty = false;
|
||||
bool opdescHashDirty = false;
|
||||
|
||||
// Add these as friend classes for the JIT so it has access to all important state
|
||||
friend class ShaderJIT;
|
||||
friend class ShaderEmitter;
|
||||
friend class PICA::ShaderGen::ShaderDecompiler;
|
||||
|
||||
vec4f getSource(u32 source);
|
||||
vec4f& getDest(u32 dest);
|
||||
|
@ -151,6 +165,7 @@ class PICAShader {
|
|||
void jmpc(u32 instruction);
|
||||
void jmpu(u32 instruction);
|
||||
void lg2(u32 instruction);
|
||||
void litp(u32 instruction);
|
||||
void loop(u32 instruction);
|
||||
void mad(u32 instruction);
|
||||
void madi(u32 instruction);
|
||||
|
@ -220,13 +235,9 @@ class PICAShader {
|
|||
public:
|
||||
static constexpr size_t maxInstructionCount = 4096;
|
||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||
|
||||
PICAShader(ShaderType type) : type(type) {}
|
||||
|
||||
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
|
||||
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
|
||||
|
||||
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
|
||||
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
|
||||
|
||||
|
@ -235,7 +246,7 @@ class PICAShader {
|
|||
Helpers::panic("o no, shader upload overflew");
|
||||
}
|
||||
|
||||
bufferedShader[bufferIndex++] = word;
|
||||
loadedShader[bufferIndex++] = word;
|
||||
bufferIndex &= 0xfff;
|
||||
|
||||
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
|
@ -256,14 +267,16 @@ class PICAShader {
|
|||
|
||||
void uploadFloatUniform(u32 word) {
|
||||
floatUniformBuffer[floatUniformWordCount++] = word;
|
||||
if (floatUniformIndex >= 96) {
|
||||
Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
|
||||
}
|
||||
|
||||
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
|
||||
vec4f& uniform = floatUniforms[floatUniformIndex++];
|
||||
floatUniformWordCount = 0;
|
||||
|
||||
// Check if the program tries to upload to a non-existent uniform, and empty the queue without writing in that case
|
||||
if (floatUniformIndex >= 96) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
vec4f& uniform = floatUniforms[floatUniformIndex++];
|
||||
|
||||
if (f32UniformTransfer) {
|
||||
uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
||||
uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
||||
|
@ -275,6 +288,7 @@ class PICAShader {
|
|||
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
}
|
||||
uniformsDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -286,6 +300,12 @@ class PICAShader {
|
|||
u[1] = getBits<8, 8>(word);
|
||||
u[2] = getBits<16, 8>(word);
|
||||
u[3] = getBits<24, 8>(word);
|
||||
uniformsDirty = true;
|
||||
}
|
||||
|
||||
void uploadBoolUniform(u32 value) {
|
||||
boolUniform = value;
|
||||
uniformsDirty = true;
|
||||
}
|
||||
|
||||
void run();
|
||||
|
@ -293,4 +313,13 @@ class PICAShader {
|
|||
|
||||
Hash getCodeHash();
|
||||
Hash getOpdescHash();
|
||||
};
|
||||
|
||||
// Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU.
|
||||
static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); }
|
||||
void* getUniformPointer() { return static_cast<void*>(&floatUniforms); }
|
||||
};
|
||||
|
||||
static_assert(
|
||||
offsetof(PICAShader, intUniforms) == offsetof(PICAShader, floatUniforms) + 96 * sizeof(float) * 4 &&
|
||||
offsetof(PICAShader, boolUniform) == offsetof(PICAShader, intUniforms) + 4 * sizeof(u8) * 4
|
||||
);
|
131
include/PICA/shader_decompiler.hpp
Normal file
|
@ -0,0 +1,131 @@
|
|||
#pragma once
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
#include "PICA/shader_gen_types.hpp"
|
||||
|
||||
struct EmulatorConfig;
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
// Control flow analysis is partially based on
|
||||
// https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33
|
||||
struct ControlFlow {
|
||||
// A continuous range of addresses
|
||||
struct AddressRange {
|
||||
u32 start, end;
|
||||
AddressRange(u32 start, u32 end) : start(start), end(end) {}
|
||||
|
||||
// Use lexicographic comparison for functions in order to sort them in a set
|
||||
bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); }
|
||||
};
|
||||
|
||||
struct Function {
|
||||
using Labels = std::set<u32>;
|
||||
|
||||
enum class ExitMode {
|
||||
Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans)
|
||||
AlwaysReturn, // All paths reach the return point.
|
||||
Conditional, // One or more code paths reach the return point or an END instruction conditionally.
|
||||
AlwaysEnd, // All paths reach an END instruction.
|
||||
};
|
||||
|
||||
u32 start; // Starting PC of the function
|
||||
u32 end; // End PC of the function
|
||||
Labels outLabels{}; // Labels this function can "goto" (jump) to
|
||||
ExitMode exitMode = ExitMode::Unknown;
|
||||
|
||||
explicit Function(u32 start, u32 end) : start(start), end(end) {}
|
||||
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
|
||||
|
||||
std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); }
|
||||
// To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end
|
||||
// instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called
|
||||
// from within functions deep in the callstack
|
||||
std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); }
|
||||
std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); }
|
||||
};
|
||||
|
||||
std::set<Function> functions{};
|
||||
std::map<AddressRange, Function::ExitMode> exitMap{};
|
||||
|
||||
// Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation
|
||||
// On the CPU
|
||||
bool analysisFailed = false;
|
||||
|
||||
// This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions
|
||||
const Function* addFunction(const PICAShader& shader, u32 start, u32 end) {
|
||||
auto searchIterator = functions.find(Function(start, end));
|
||||
if (searchIterator != functions.end()) {
|
||||
return &(*searchIterator);
|
||||
}
|
||||
|
||||
// Add this function and analyze it if it doesn't already exist
|
||||
Function function(start, end);
|
||||
function.exitMode = analyzeFunction(shader, start, end, function.outLabels);
|
||||
|
||||
// This function could not be fully analyzed, report failure
|
||||
if (function.exitMode == Function::ExitMode::Unknown) {
|
||||
analysisFailed = true;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Add function to our function list
|
||||
auto [it, added] = functions.insert(std::move(function));
|
||||
return &(*it);
|
||||
}
|
||||
|
||||
void analyze(const PICAShader& shader, u32 entrypoint);
|
||||
Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels);
|
||||
};
|
||||
|
||||
class ShaderDecompiler {
|
||||
using AddressRange = ControlFlow::AddressRange;
|
||||
using Function = ControlFlow::Function;
|
||||
|
||||
ControlFlow controlFlow{};
|
||||
|
||||
PICAShader& shader;
|
||||
EmulatorConfig& config;
|
||||
std::string decompiledShader;
|
||||
|
||||
u32 entrypoint;
|
||||
|
||||
API api;
|
||||
Language language;
|
||||
bool compilationError = false;
|
||||
|
||||
void compileInstruction(u32& pc, bool& finished);
|
||||
// Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction)
|
||||
std::pair<u32, bool> compileRange(const AddressRange& range);
|
||||
void callFunction(const Function& function);
|
||||
const Function* findFunction(const AddressRange& range);
|
||||
|
||||
void writeAttributes();
|
||||
|
||||
std::string getSource(u32 source, u32 index) const;
|
||||
std::string getDest(u32 dest) const;
|
||||
std::string getSwizzlePattern(u32 swizzle) const;
|
||||
std::string getDestSwizzle(u32 destinationMask) const;
|
||||
const char* getCondition(u32 cond, u32 refX, u32 refY);
|
||||
|
||||
void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value);
|
||||
// Returns if the instruction uses the typical register encodings most instructions use
|
||||
// With some exceptions like MAD/MADI, and the control flow instructions which are completely different
|
||||
bool usesCommonEncoding(u32 instruction) const;
|
||||
|
||||
public:
|
||||
ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language)
|
||||
: shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {}
|
||||
|
||||
std::string decompile();
|
||||
};
|
||||
|
||||
std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language);
|
||||
} // namespace PICA::ShaderGen
|
43
include/PICA/shader_gen.hpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
#include <string>
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/pica_vert_config.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen_types.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
class FragmentGenerator {
|
||||
API api;
|
||||
Language language;
|
||||
|
||||
void compileTEV(std::string& shader, int stage, const PICA::FragmentConfig& config);
|
||||
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index, const PICA::FragmentConfig& config);
|
||||
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index, const PICA::FragmentConfig& config);
|
||||
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index, const PICA::FragmentConfig& config);
|
||||
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
|
||||
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
|
||||
|
||||
void applyAlphaTest(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLights(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLUTLookup(std::string& shader, const PICA::FragmentConfig& config, u32 lightIndex, u32 lutID);
|
||||
bool isSamplerEnabled(u32 environmentID, u32 lutID);
|
||||
|
||||
void compileFog(std::string& shader, const PICA::FragmentConfig& config);
|
||||
void compileLogicOps(std::string& shader, const PICA::FragmentConfig& config);
|
||||
|
||||
public:
|
||||
FragmentGenerator(API api, Language language) : api(api), language(language) {}
|
||||
std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr);
|
||||
std::string getDefaultVertexShader();
|
||||
// For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader
|
||||
std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader);
|
||||
|
||||
void setTarget(API api, Language language) {
|
||||
this->api = api;
|
||||
this->language = language;
|
||||
}
|
||||
};
|
||||
}; // namespace PICA::ShaderGen
|
9
include/PICA/shader_gen_types.hpp
Normal file
|
@ -0,0 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
namespace PICA::ShaderGen {
|
||||
// Graphics API this shader is targetting
|
||||
enum class API { GL, GLES, Vulkan };
|
||||
|
||||
// Shading language to use (Only GLSL for the time being)
|
||||
enum class Language { GLSL };
|
||||
} // namespace PICA::ShaderGen
|
|
@ -2,10 +2,9 @@
|
|||
#include "PICA/shader.hpp"
|
||||
|
||||
class ShaderUnit {
|
||||
|
||||
public:
|
||||
PICAShader vs; // Vertex shader
|
||||
PICAShader gs; // Geometry shader
|
||||
public:
|
||||
PICAShader vs; // Vertex shader
|
||||
PICAShader gs; // Geometry shader
|
||||
|
||||
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
|
||||
void reset();
|
||||
|
|
99
include/align.hpp
Normal file
|
@ -0,0 +1,99 @@
|
|||
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
||||
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
namespace Common {
|
||||
template <typename T>
|
||||
constexpr bool isAligned(T value, unsigned int alignment) {
|
||||
return (value % static_cast<T>(alignment)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignUp(T value, unsigned int alignment) {
|
||||
return (value + static_cast<T>(alignment - 1)) / static_cast<T>(alignment) * static_cast<T>(alignment);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignDown(T value, unsigned int alignment) {
|
||||
return value / static_cast<T>(alignment) * static_cast<T>(alignment);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isAlignedPow2(T value, unsigned int alignment) {
|
||||
return (value & static_cast<T>(alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignUpPow2(T value, unsigned int alignment) {
|
||||
return (value + static_cast<T>(alignment - 1)) & static_cast<T>(~static_cast<T>(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T alignDownPow2(T value, unsigned int alignment) {
|
||||
return value & static_cast<T>(~static_cast<T>(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool isPow2(T value) {
|
||||
return (value & (value - 1)) == 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T previousPow2(T value) {
|
||||
if (value == static_cast<T>(0)) return 0;
|
||||
|
||||
value |= (value >> 1);
|
||||
value |= (value >> 2);
|
||||
value |= (value >> 4);
|
||||
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
|
||||
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
|
||||
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
|
||||
return value - (value >> 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T nextPow2(T value) {
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
|
||||
if (value == static_cast<T>(0)) return 0;
|
||||
|
||||
value--;
|
||||
value |= (value >> 1);
|
||||
value |= (value >> 2);
|
||||
value |= (value >> 4);
|
||||
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
|
||||
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
|
||||
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
|
||||
value++;
|
||||
return value;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) {
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
// Unaligned sizes are slow on macOS.
|
||||
#ifdef __APPLE__
|
||||
if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1);
|
||||
#endif
|
||||
void* ret = nullptr;
|
||||
return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void alignedFree(void* ptr) {
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
} // namespace Common
|
82
include/audio/aac.hpp
Normal file
|
@ -0,0 +1,82 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <type_traits>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "swap.hpp"
|
||||
|
||||
namespace Audio::AAC {
|
||||
namespace ResultCode {
|
||||
enum : u32 {
|
||||
Success = 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Enum values and struct definitions based off Citra
|
||||
namespace Command {
|
||||
enum : u16 {
|
||||
Init = 0, // Initialize encoder/decoder
|
||||
EncodeDecode = 1, // Encode/Decode AAC
|
||||
Shutdown = 2, // Shutdown encoder/decoder
|
||||
LoadState = 3,
|
||||
SaveState = 4,
|
||||
};
|
||||
}
|
||||
|
||||
namespace SampleRate {
|
||||
enum : u32 {
|
||||
Rate48000 = 0,
|
||||
Rate44100 = 1,
|
||||
Rate32000 = 2,
|
||||
Rate24000 = 3,
|
||||
Rate22050 = 4,
|
||||
Rate16000 = 5,
|
||||
Rate12000 = 6,
|
||||
Rate11025 = 7,
|
||||
Rate8000 = 8,
|
||||
};
|
||||
}
|
||||
|
||||
namespace Mode {
|
||||
enum : u16 {
|
||||
None = 0,
|
||||
Decode = 1,
|
||||
Encode = 2,
|
||||
};
|
||||
}
|
||||
|
||||
struct DecodeResponse {
|
||||
u32_le sampleRate;
|
||||
u32_le channelCount;
|
||||
u32_le size;
|
||||
u32_le unknown1;
|
||||
u32_le unknown2;
|
||||
u32_le sampleCount;
|
||||
};
|
||||
|
||||
struct DecodeRequest {
|
||||
u32_le address; // Address of input AAC stream
|
||||
u32_le size; // Size of input AAC stream
|
||||
u32_le destAddrLeft; // Output address for left channel samples
|
||||
u32_le destAddrRight; // Output address for right channel samples
|
||||
u32_le unknown1;
|
||||
u32_le unknown2;
|
||||
};
|
||||
|
||||
struct Message {
|
||||
u16_le mode = Mode::None; // Encode or decode AAC?
|
||||
u16_le command = Command::Init;
|
||||
u32_le resultCode = ResultCode::Success;
|
||||
|
||||
// Info on the AAC request
|
||||
union {
|
||||
std::array<u8, 24> commandData{};
|
||||
|
||||
DecodeResponse decodeResponse;
|
||||
DecodeRequest decodeRequest;
|
||||
};
|
||||
};
|
||||
|
||||
static_assert(sizeof(Message) == 32);
|
||||
static_assert(std::is_trivially_copyable<Message>());
|
||||
} // namespace Audio::AAC
|
24
include/audio/aac_decoder.hpp
Normal file
|
@ -0,0 +1,24 @@
|
|||
#pragma once
|
||||
#include <functional>
|
||||
|
||||
#include "audio/aac.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
struct AAC_DECODER_INSTANCE;
|
||||
|
||||
namespace Audio::AAC {
|
||||
class Decoder {
|
||||
using DecoderHandle = AAC_DECODER_INSTANCE*;
|
||||
using PaddrCallback = std::function<u8*(u32)>;
|
||||
|
||||
DecoderHandle decoderHandle = nullptr;
|
||||
|
||||
bool isInitialized() { return decoderHandle != nullptr; }
|
||||
void initialize();
|
||||
|
||||
public:
|
||||
// Decode function. Takes in a reference to the AAC response & request, and a callback for paddr -> pointer conversions
|
||||
void decode(AAC::Message& response, const AAC::Message& request, PaddrCallback paddrCallback);
|
||||
~Decoder();
|
||||
};
|
||||
} // namespace Audio::AAC
|
|
@ -43,7 +43,7 @@ namespace Audio {
|
|||
virtual ~DSPCore() {}
|
||||
|
||||
virtual void reset() = 0;
|
||||
virtual void runAudioFrame() = 0;
|
||||
virtual void runAudioFrame(u64 eventTimestamp) = 0;
|
||||
virtual u8* getDspMemory() = 0;
|
||||
|
||||
virtual u16 recvData(u32 regId) = 0;
|
||||
|
|
|
@ -294,12 +294,12 @@ namespace Audio::HLE {
|
|||
|
||||
struct SourceStatus {
|
||||
struct Status {
|
||||
u8 isEnabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
|
||||
u8 enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.)
|
||||
u8 currentBufferIDDirty; ///< Non-zero when current_buffer_id changes
|
||||
u16_le syncCount; ///< Is set by the DSP to the value of SourceConfiguration::sync_count
|
||||
u32_dsp bufferPosition; ///< Number of samples into the current buffer
|
||||
u32_dsp samplePosition; ///< Number of samples into the current buffer
|
||||
u16_le currentBufferID; ///< Updated when a buffer finishes playing
|
||||
u16_le lastBufferID; ///< Updated when all buffers in the queue finish playing
|
||||
u16_le previousBufferID; ///< Updated when all buffers in the queue finish playing
|
||||
};
|
||||
|
||||
Status status[sourceCount];
|
||||
|
@ -324,8 +324,8 @@ namespace Audio::HLE {
|
|||
BitField<15, 1, u32> outputBufferCountDirty;
|
||||
BitField<16, 1, u32> masterVolumeDirty;
|
||||
|
||||
BitField<24, 1, u32> auxReturnVolume0Dirty;
|
||||
BitField<25, 1, u32> auxReturnVolume1Dirty;
|
||||
BitField<24, 1, u32> auxVolume0Dirty;
|
||||
BitField<25, 1, u32> auxVolume1Dirty;
|
||||
BitField<26, 1, u32> outputFormatDirty;
|
||||
BitField<27, 1, u32> clippingModeDirty;
|
||||
BitField<28, 1, u32> headphonesConnectedDirty;
|
||||
|
@ -337,7 +337,7 @@ namespace Audio::HLE {
|
|||
/// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for
|
||||
/// each at the final mixer.
|
||||
float_le masterVolume;
|
||||
std::array<float_le, 2> auxReturnVolume;
|
||||
std::array<float_le, 2> auxVolumes;
|
||||
|
||||
u16_le outputBufferCount;
|
||||
u16 pad1[2];
|
||||
|
@ -422,7 +422,7 @@ namespace Audio::HLE {
|
|||
|
||||
struct DspStatus {
|
||||
u16_le unknown;
|
||||
u16_le dropped_frames;
|
||||
u16_le droppedFrames;
|
||||
u16 pad0[0xE];
|
||||
};
|
||||
ASSERT_DSP_STRUCT(DspStatus, 32);
|
||||
|
|
|
@ -2,9 +2,12 @@
|
|||
#include <array>
|
||||
#include <cassert>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "audio/aac.hpp"
|
||||
#include "audio/aac_decoder.hpp"
|
||||
#include "audio/dsp_core.hpp"
|
||||
#include "audio/dsp_shared_mem.hpp"
|
||||
#include "memory.hpp"
|
||||
|
@ -41,15 +44,25 @@ namespace Audio {
|
|||
return this->bufferID > other.bufferID;
|
||||
}
|
||||
};
|
||||
|
||||
// Buffer of decoded PCM16 samples. TODO: Are there better alternatives to use over deque?
|
||||
using SampleBuffer = std::deque<std::array<s16, 2>>;
|
||||
|
||||
using BufferQueue = std::priority_queue<Buffer>;
|
||||
BufferQueue buffers;
|
||||
|
||||
SampleFormat sampleFormat = SampleFormat::ADPCM;
|
||||
SourceType sourceType = SourceType::Stereo;
|
||||
|
||||
std::array<float, 3> gain0, gain1, gain2;
|
||||
u32 samplePosition; // Sample number into the current audio buffer
|
||||
float rateMultiplier;
|
||||
u16 syncCount;
|
||||
bool enabled; // Is the source enabled?
|
||||
u16 currentBufferID;
|
||||
u16 previousBufferID;
|
||||
|
||||
bool enabled; // Is the source enabled?
|
||||
bool isBufferIDDirty = false; // Did we change buffers?
|
||||
|
||||
// ADPCM decoding info:
|
||||
// An array of fixed point S5.11 coefficients. These provide "weights" for the history samples
|
||||
|
@ -65,6 +78,10 @@ namespace Audio {
|
|||
int index = 0; // Index of the voice in [0, 23] for debugging
|
||||
|
||||
void reset();
|
||||
|
||||
// Push a buffer to the buffer queue
|
||||
void pushBuffer(const Buffer& buffer) { buffers.push(buffer); }
|
||||
|
||||
// Pop a buffer from the buffer queue and return it
|
||||
Buffer popBuffer() {
|
||||
assert(!buffers.empty());
|
||||
|
@ -78,8 +95,7 @@ namespace Audio {
|
|||
DSPSource() { reset(); }
|
||||
};
|
||||
|
||||
class HLE_DSP : public DSPCore {
|
||||
// The audio frame types are public in case we want to use them for unit tests
|
||||
class DSPMixer {
|
||||
public:
|
||||
template <typename T, usize channelCount = 1>
|
||||
using Sample = std::array<T, channelCount>;
|
||||
|
@ -96,6 +112,43 @@ namespace Audio {
|
|||
template <typename T>
|
||||
using QuadFrame = Frame<T, 4>;
|
||||
|
||||
private:
|
||||
using ChannelFormat = HLE::DspConfiguration::OutputFormat;
|
||||
// The audio from each DSP voice is converted to quadraphonic and then fed into 3 intermediate mixing stages
|
||||
// Two of these intermediate mixers (second and third) are used for effects, including custom effects done on the CPU
|
||||
static constexpr usize mixerStageCount = 3;
|
||||
|
||||
public:
|
||||
ChannelFormat channelFormat = ChannelFormat::Stereo;
|
||||
std::array<float, mixerStageCount> volumes;
|
||||
std::array<bool, 2> enableAuxStages;
|
||||
|
||||
void reset() {
|
||||
channelFormat = ChannelFormat::Stereo;
|
||||
|
||||
volumes.fill(0.0);
|
||||
enableAuxStages.fill(false);
|
||||
}
|
||||
};
|
||||
|
||||
class HLE_DSP : public DSPCore {
|
||||
// The audio frame types are public in case we want to use them for unit tests
|
||||
public:
|
||||
template <typename T, usize channelCount = 1>
|
||||
using Sample = DSPMixer::Sample<T, channelCount>;
|
||||
|
||||
template <typename T, usize channelCount>
|
||||
using Frame = DSPMixer::Frame<T, channelCount>;
|
||||
|
||||
template <typename T>
|
||||
using MonoFrame = DSPMixer::MonoFrame<T>;
|
||||
|
||||
template <typename T>
|
||||
using StereoFrame = DSPMixer::StereoFrame<T>;
|
||||
|
||||
template <typename T>
|
||||
using QuadFrame = DSPMixer::QuadFrame<T>;
|
||||
|
||||
using Source = Audio::DSPSource;
|
||||
using SampleBuffer = Source::SampleBuffer;
|
||||
|
||||
|
@ -114,8 +167,8 @@ namespace Audio {
|
|||
std::array<Source, Audio::HLE::sourceCount> sources; // DSP voices
|
||||
Audio::HLE::DspMemory dspRam;
|
||||
|
||||
SampleFormat sampleFormat = SampleFormat::ADPCM;
|
||||
SourceType sourceType = SourceType::Stereo;
|
||||
Audio::DSPMixer mixer;
|
||||
std::unique_ptr<Audio::AAC::Decoder> aacDecoder;
|
||||
|
||||
void resetAudioPipe();
|
||||
bool loaded = false; // Have we loaded a component?
|
||||
|
@ -132,7 +185,7 @@ namespace Audio {
|
|||
} else if (counter1 == 0xffff && counter0 != 0xfffe) {
|
||||
return 0;
|
||||
} else {
|
||||
return counter0 > counter1 ? 0 : 0;
|
||||
return (counter0 > counter1) ? 0 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -157,11 +210,20 @@ namespace Audio {
|
|||
}
|
||||
}
|
||||
|
||||
void handleAACRequest(const AAC::Message& request);
|
||||
void updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients);
|
||||
void updateMixerConfig(HLE::SharedMemory& sharedMem);
|
||||
void generateFrame(StereoFrame<s16>& frame);
|
||||
void generateFrame(DSPSource& source);
|
||||
void outputFrame();
|
||||
// Perform the final mix, mixing the quadraphonic samples from all voices into the output audio frame
|
||||
void performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion);
|
||||
|
||||
// Decode an entire buffer worth of audio
|
||||
void decodeBuffer(DSPSource& source);
|
||||
|
||||
SampleBuffer decodePCM8(const u8* data, usize sampleCount, Source& source);
|
||||
SampleBuffer decodePCM16(const u8* data, usize sampleCount, Source& source);
|
||||
SampleBuffer decodeADPCM(const u8* data, usize sampleCount, Source& source);
|
||||
|
||||
public:
|
||||
|
@ -169,7 +231,7 @@ namespace Audio {
|
|||
~HLE_DSP() override {}
|
||||
|
||||
void reset() override;
|
||||
void runAudioFrame() override;
|
||||
void runAudioFrame(u64 eventTimestamp) override;
|
||||
|
||||
u8* getDspMemory() override { return dspRam.rawMemory.data(); }
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ namespace Audio {
|
|||
~NullDSP() override {}
|
||||
|
||||
void reset() override;
|
||||
void runAudioFrame() override;
|
||||
void runAudioFrame(u64 eventTimestamp) override;
|
||||
|
||||
u8* getDspMemory() override { return dspRam.data(); }
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ namespace Audio {
|
|||
void reset() override;
|
||||
|
||||
// Run 1 slice of DSP instructions and schedule the next audio frame
|
||||
void runAudioFrame() override {
|
||||
void runAudioFrame(u64 eventTimestamp) override {
|
||||
runSlice();
|
||||
scheduler.addEvent(Scheduler::EventType::RunDSP, scheduler.currentTimestamp + Audio::lleSlice * 2);
|
||||
}
|
||||
|
|
|
@ -7,16 +7,33 @@
|
|||
// Remember to initialize every field here to its default value otherwise bad things will happen
|
||||
struct EmulatorConfig {
|
||||
// Only enable the shader JIT by default on platforms where it's completely tested
|
||||
#ifdef PANDA3DS_X64_HOST
|
||||
#if defined(PANDA3DS_X64_HOST) || defined(PANDA3DS_ARM64_HOST)
|
||||
static constexpr bool shaderJitDefault = true;
|
||||
#else
|
||||
static constexpr bool shaderJitDefault = false;
|
||||
#endif
|
||||
|
||||
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
|
||||
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
|
||||
#if defined(__ANDROID__) || defined(__APPLE__)
|
||||
static constexpr bool ubershaderDefault = false;
|
||||
#else
|
||||
static constexpr bool ubershaderDefault = true;
|
||||
#endif
|
||||
static constexpr bool accelerateShadersDefault = true;
|
||||
|
||||
bool shaderJitEnabled = shaderJitDefault;
|
||||
bool useUbershaders = ubershaderDefault;
|
||||
bool accelerateShaders = accelerateShadersDefault;
|
||||
bool accurateShaderMul = false;
|
||||
bool discordRpcEnabled = false;
|
||||
|
||||
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance
|
||||
bool forceShadergenForLights = true;
|
||||
int lightShadergenThreshold = 1;
|
||||
|
||||
RendererType rendererType = RendererType::OpenGL;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;
|
||||
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::HLE;
|
||||
|
||||
bool sdCardInserted = true;
|
||||
bool sdWriteProtected = false;
|
||||
|
@ -25,6 +42,9 @@ struct EmulatorConfig {
|
|||
bool audioEnabled = false;
|
||||
bool vsyncEnabled = true;
|
||||
|
||||
bool enableRenderdoc = false;
|
||||
bool printAppVersion = true;
|
||||
|
||||
bool chargerPlugged = true;
|
||||
// Default to 3% battery to make users suffer
|
||||
int batteryPercentage = 3;
|
||||
|
@ -33,7 +53,25 @@ struct EmulatorConfig {
|
|||
std::filesystem::path defaultRomPath = "";
|
||||
std::filesystem::path filePath;
|
||||
|
||||
// Frontend window settings
|
||||
struct WindowSettings {
|
||||
static constexpr int defaultX = 200;
|
||||
static constexpr int defaultY = 200;
|
||||
static constexpr int defaultWidth = 800;
|
||||
static constexpr int defaultHeight = 240 * 2;
|
||||
|
||||
bool rememberPosition = false; // Remember window position & size
|
||||
bool showAppVersion = false;
|
||||
|
||||
int x = defaultX;
|
||||
int y = defaultY;
|
||||
int width = defaultHeight;
|
||||
int height = defaultHeight;
|
||||
};
|
||||
|
||||
WindowSettings windowSettings;
|
||||
|
||||
EmulatorConfig(const std::filesystem::path& path);
|
||||
void load();
|
||||
void save();
|
||||
};
|
||||
};
|
||||
|
|
|
@ -1,20 +1,29 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <cstdint>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "io_file.hpp"
|
||||
#include "swap.hpp"
|
||||
|
||||
namespace Crypto {
|
||||
constexpr std::size_t AesKeySize = 0x10;
|
||||
constexpr usize AesKeySize = 0x10;
|
||||
using AESKey = std::array<u8, AesKeySize>;
|
||||
|
||||
template <std::size_t N>
|
||||
static std::array<u8, N> rolArray(const std::array<u8, N>& value, std::size_t bits) {
|
||||
struct Seed {
|
||||
u64_le titleID;
|
||||
AESKey seed;
|
||||
std::array<u8, 8> pad;
|
||||
};
|
||||
|
||||
template <usize N>
|
||||
static std::array<u8, N> rolArray(const std::array<u8, N>& value, usize bits) {
|
||||
const auto bitWidth = N * CHAR_BIT;
|
||||
|
||||
bits %= bitWidth;
|
||||
|
@ -24,18 +33,18 @@ namespace Crypto {
|
|||
|
||||
std::array<u8, N> result;
|
||||
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
for (usize i = 0; i < N; i++) {
|
||||
result[i] = ((value[(i + byteShift) % N] << bitShift) | (value[(i + byteShift + 1) % N] >> (CHAR_BIT - bitShift))) & UINT8_MAX;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
template <usize N>
|
||||
static std::array<u8, N> addArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
|
||||
std::array<u8, N> result;
|
||||
std::size_t sum = 0;
|
||||
std::size_t carry = 0;
|
||||
usize sum = 0;
|
||||
usize carry = 0;
|
||||
|
||||
for (std::int64_t i = N - 1; i >= 0; i--) {
|
||||
sum = a[i] + b[i] + carry;
|
||||
|
@ -46,11 +55,11 @@ namespace Crypto {
|
|||
return result;
|
||||
}
|
||||
|
||||
template <std::size_t N>
|
||||
template <usize N>
|
||||
static std::array<u8, N> xorArray(const std::array<u8, N>& a, const std::array<u8, N>& b) {
|
||||
std::array<u8, N> result;
|
||||
|
||||
for (std::size_t i = 0; i < N; i++) {
|
||||
for (usize i = 0; i < N; i++) {
|
||||
result[i] = a[i] ^ b[i];
|
||||
}
|
||||
|
||||
|
@ -63,7 +72,7 @@ namespace Crypto {
|
|||
}
|
||||
|
||||
AESKey rawKey;
|
||||
for (std::size_t i = 0; i < rawKey.size(); i++) {
|
||||
for (usize i = 0; i < rawKey.size(); i++) {
|
||||
rawKey[i] = static_cast<u8>(std::stoi(hex.substr(i * 2, 2), 0, 16));
|
||||
}
|
||||
|
||||
|
@ -76,7 +85,7 @@ namespace Crypto {
|
|||
std::optional<AESKey> normalKey = std::nullopt;
|
||||
};
|
||||
|
||||
enum KeySlotId : std::size_t {
|
||||
enum KeySlotId : usize {
|
||||
NCCHKey0 = 0x2C,
|
||||
NCCHKey1 = 0x25,
|
||||
NCCHKey2 = 0x18,
|
||||
|
@ -84,14 +93,17 @@ namespace Crypto {
|
|||
};
|
||||
|
||||
class AESEngine {
|
||||
private:
|
||||
constexpr static std::size_t AesKeySlotCount = 0x40;
|
||||
private:
|
||||
constexpr static usize AesKeySlotCount = 0x40;
|
||||
|
||||
std::optional<AESKey> m_generator = std::nullopt;
|
||||
std::array<AESKeySlot, AesKeySlotCount> m_slots;
|
||||
bool keysLoaded = false;
|
||||
|
||||
constexpr void updateNormalKey(std::size_t slotId) {
|
||||
std::vector<Seed> seeds;
|
||||
IOFile seedDatabase;
|
||||
|
||||
constexpr void updateNormalKey(usize slotId) {
|
||||
if (m_generator.has_value() && hasKeyX(slotId) && hasKeyY(slotId)) {
|
||||
auto& keySlot = m_slots.at(slotId);
|
||||
AESKey keyX = keySlot.keyX.value();
|
||||
|
@ -101,13 +113,17 @@ namespace Crypto {
|
|||
}
|
||||
}
|
||||
|
||||
public:
|
||||
public:
|
||||
AESEngine() {}
|
||||
void loadKeys(const std::filesystem::path& path);
|
||||
void setSeedPath(const std::filesystem::path& path);
|
||||
// Returns true on success, false on failure
|
||||
bool loadSeeds();
|
||||
|
||||
bool haveKeys() { return keysLoaded; }
|
||||
bool haveGenerator() { return m_generator.has_value(); }
|
||||
|
||||
constexpr bool hasKeyX(std::size_t slotId) {
|
||||
constexpr bool hasKeyX(usize slotId) {
|
||||
if (slotId >= AesKeySlotCount) {
|
||||
return false;
|
||||
}
|
||||
|
@ -115,18 +131,16 @@ namespace Crypto {
|
|||
return m_slots.at(slotId).keyX.has_value();
|
||||
}
|
||||
|
||||
constexpr AESKey getKeyX(std::size_t slotId) {
|
||||
return m_slots.at(slotId).keyX.value_or(AESKey{});
|
||||
}
|
||||
constexpr AESKey getKeyX(usize slotId) { return m_slots.at(slotId).keyX.value_or(AESKey{}); }
|
||||
|
||||
constexpr void setKeyX(std::size_t slotId, const AESKey &key) {
|
||||
constexpr void setKeyX(usize slotId, const AESKey& key) {
|
||||
if (slotId < AesKeySlotCount) {
|
||||
m_slots.at(slotId).keyX = key;
|
||||
updateNormalKey(slotId);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool hasKeyY(std::size_t slotId) {
|
||||
constexpr bool hasKeyY(usize slotId) {
|
||||
if (slotId >= AesKeySlotCount) {
|
||||
return false;
|
||||
}
|
||||
|
@ -134,18 +148,16 @@ namespace Crypto {
|
|||
return m_slots.at(slotId).keyY.has_value();
|
||||
}
|
||||
|
||||
constexpr AESKey getKeyY(std::size_t slotId) {
|
||||
return m_slots.at(slotId).keyY.value_or(AESKey{});
|
||||
}
|
||||
constexpr AESKey getKeyY(usize slotId) { return m_slots.at(slotId).keyY.value_or(AESKey{}); }
|
||||
|
||||
constexpr void setKeyY(std::size_t slotId, const AESKey &key) {
|
||||
constexpr void setKeyY(usize slotId, const AESKey& key) {
|
||||
if (slotId < AesKeySlotCount) {
|
||||
m_slots.at(slotId).keyY = key;
|
||||
updateNormalKey(slotId);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool hasNormalKey(std::size_t slotId) {
|
||||
constexpr bool hasNormalKey(usize slotId) {
|
||||
if (slotId >= AesKeySlotCount) {
|
||||
return false;
|
||||
}
|
||||
|
@ -153,14 +165,14 @@ namespace Crypto {
|
|||
return m_slots.at(slotId).normalKey.has_value();
|
||||
}
|
||||
|
||||
constexpr AESKey getNormalKey(std::size_t slotId) {
|
||||
return m_slots.at(slotId).normalKey.value_or(AESKey{});
|
||||
}
|
||||
constexpr AESKey getNormalKey(usize slotId) { return m_slots.at(slotId).normalKey.value_or(AESKey{}); }
|
||||
|
||||
constexpr void setNormalKey(std::size_t slotId, const AESKey &key) {
|
||||
constexpr void setNormalKey(usize slotId, const AESKey& key) {
|
||||
if (slotId < AesKeySlotCount) {
|
||||
m_slots.at(slotId).normalKey = key;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<AESKey> getSeedFromDB(u64 titleID);
|
||||
};
|
||||
}
|
||||
} // namespace Crypto
|
||||
|
|
|
@ -135,4 +135,7 @@ class Emulator {
|
|||
std::filesystem::path getAppDataRoot();
|
||||
|
||||
std::span<u8> getSMDH();
|
||||
|
||||
private:
|
||||
void loadRenderdoc();
|
||||
};
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#pragma once
|
||||
#include "helpers.hpp"
|
||||
|
||||
using Handle = u32;
|
||||
using HorizonHandle = u32;
|
||||
|
||||
namespace KernelHandles {
|
||||
enum : u32 {
|
||||
|
@ -61,17 +61,17 @@ namespace KernelHandles {
|
|||
};
|
||||
|
||||
// Returns whether "handle" belongs to one of the OS services
|
||||
static constexpr bool isServiceHandle(Handle handle) {
|
||||
static constexpr bool isServiceHandle(HorizonHandle handle) {
|
||||
return handle >= MinServiceHandle && handle <= MaxServiceHandle;
|
||||
}
|
||||
|
||||
// Returns whether "handle" belongs to one of the OS services' shared memory areas
|
||||
static constexpr bool isSharedMemHandle(Handle handle) {
|
||||
static constexpr bool isSharedMemHandle(HorizonHandle handle) {
|
||||
return handle >= MinSharedMemHandle && handle <= MaxSharedMemHandle;
|
||||
}
|
||||
|
||||
// Returns the name of a handle as a string based on the given handle
|
||||
static const char* getServiceName(Handle handle) {
|
||||
static const char* getServiceName(HorizonHandle handle) {
|
||||
switch (handle) {
|
||||
case AC: return "AC";
|
||||
case ACT: return "ACT";
|
||||
|
|
|
@ -16,8 +16,11 @@
|
|||
#include "services/service_manager.hpp"
|
||||
|
||||
class CPU;
|
||||
struct Scheduler;
|
||||
|
||||
class Kernel {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
std::span<u32, 16> regs;
|
||||
CPU& cpu;
|
||||
Memory& mem;
|
||||
|
@ -247,6 +250,7 @@ public:
|
|||
|
||||
ServiceManager& getServiceManager() { return serviceManager; }
|
||||
KFcram& getFcramManager() { return fcramManager; }
|
||||
Scheduler& getScheduler();
|
||||
|
||||
void sendGPUInterrupt(GPUInterrupt type) { serviceManager.sendGPUInterrupt(type); }
|
||||
void clearInstructionCache();
|
||||
|
|
|
@ -53,7 +53,7 @@ enum class FcramRegion {
|
|||
struct AddressArbiter {};
|
||||
|
||||
struct ResourceLimits {
|
||||
Handle handle;
|
||||
HorizonHandle handle;
|
||||
|
||||
s32 currentCommit = 0;
|
||||
};
|
||||
|
@ -97,6 +97,8 @@ struct Port {
|
|||
};
|
||||
|
||||
struct Session {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle portHandle; // The port this session is subscribed to
|
||||
Session(Handle portHandle) : portHandle(portHandle) {}
|
||||
};
|
||||
|
@ -115,6 +117,8 @@ enum class ThreadStatus {
|
|||
};
|
||||
|
||||
struct Thread {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
u32 initialSP; // Initial r13 value
|
||||
u32 entrypoint; // Initial r15 value
|
||||
u32 priority;
|
||||
|
@ -167,6 +171,8 @@ static const char* kernelObjectTypeToString(KernelObjectType t) {
|
|||
}
|
||||
|
||||
struct Mutex {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
u64 waitlist; // Refer to the getWaitlist function below for documentation
|
||||
Handle ownerThread = 0; // Index of the thread that holds the mutex if it's locked
|
||||
Handle handle; // Handle of the mutex itself
|
||||
|
@ -209,6 +215,8 @@ struct MemoryBlock {
|
|||
|
||||
// Generic kernel object class
|
||||
struct KernelObject {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = 0; // A u32 the OS will use to identify objects
|
||||
void* data = nullptr;
|
||||
KernelObjectType type;
|
||||
|
|
|
@ -50,6 +50,7 @@ struct NCCH {
|
|||
|
||||
static constexpr u64 mediaUnit = 0x200;
|
||||
u64 size = 0; // Size of NCCH converted to bytes
|
||||
u64 saveDataSize = 0;
|
||||
u32 stackSize = 0;
|
||||
u32 bssSize = 0;
|
||||
u32 exheaderSize = 0;
|
||||
|
@ -60,10 +61,10 @@ struct NCCH {
|
|||
CodeSetInfo text, data, rodata;
|
||||
FSInfo partitionInfo;
|
||||
|
||||
std::optional<Crypto::AESKey> primaryKey, secondaryKey;
|
||||
|
||||
// Contents of the .code file in the ExeFS
|
||||
std::vector<u8> codeFile;
|
||||
// Contains of the cart's save data
|
||||
std::vector<u8> saveData;
|
||||
// The cart region. Only the CXI's region matters to us. Necessary to get past region locking
|
||||
std::optional<Regions> region = std::nullopt;
|
||||
std::vector<u8> smdh;
|
||||
|
@ -76,7 +77,7 @@ struct NCCH {
|
|||
bool hasExeFS() { return exeFS.size != 0; }
|
||||
bool hasRomFS() { return romFS.size != 0; }
|
||||
bool hasCode() { return codeFile.size() != 0; }
|
||||
bool hasSaveData() { return saveData.size() != 0; }
|
||||
bool hasSaveData() { return saveDataSize != 0; }
|
||||
|
||||
// Parse SMDH for region info and such. Returns false on failure, true on success
|
||||
bool parseSMDH(const std::vector<u8> &smdh);
|
||||
|
|
|
@ -114,6 +114,7 @@ class Memory {
|
|||
bool changeState = false;
|
||||
bool changePerms = false;
|
||||
};
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
u8* fcram;
|
||||
u8* dspRam; // Provided to us by Audio
|
||||
|
@ -222,8 +223,14 @@ private:
|
|||
u8* getFCRAM() { return fcram; }
|
||||
|
||||
enum class BatteryLevel {
|
||||
Empty = 0, AlmostEmpty, OneBar, TwoBars, ThreeBars, FourBars
|
||||
Empty = 0,
|
||||
AlmostEmpty,
|
||||
OneBar,
|
||||
TwoBars,
|
||||
ThreeBars,
|
||||
FourBars,
|
||||
};
|
||||
|
||||
u8 getBatteryState(bool adapterConnected, bool charging, BatteryLevel batteryLevel) {
|
||||
u8 value = static_cast<u8>(batteryLevel) << 2; // Bits 2:4 are the battery level from 0 to 5
|
||||
if (adapterConnected) value |= 1 << 0; // Bit 0 shows if the charger is connected
|
||||
|
@ -287,5 +294,5 @@ private:
|
|||
|
||||
bool allocateMainThreadStack(u32 size);
|
||||
Regions getConsoleRegion();
|
||||
void copySharedFont(u8* ptr);
|
||||
void copySharedFont(u8* ptr, u32 vaddr);
|
||||
};
|
||||
|
|
|
@ -1,6 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <QAction>
|
||||
#include <QCheckBox>
|
||||
#include <QDialog>
|
||||
#include <QLabel>
|
||||
#include <QLineEdit>
|
||||
#include <QListWidget>
|
||||
#include <QPushButton>
|
||||
#include <QTextEdit>
|
||||
#include <QWidget>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
|
@ -24,3 +31,60 @@ class CheatsWindow final : public QWidget {
|
|||
std::filesystem::path cheatPath;
|
||||
Emulator* emu;
|
||||
};
|
||||
|
||||
struct CheatMetadata {
|
||||
u32 handle = Cheats::badCheatHandle;
|
||||
std::string name = "New cheat";
|
||||
std::string code;
|
||||
bool enabled = true;
|
||||
};
|
||||
|
||||
class CheatEntryWidget : public QWidget {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
CheatEntryWidget(Emulator* emu, CheatMetadata metadata, QListWidget* parent);
|
||||
|
||||
void Update() {
|
||||
name->setText(metadata.name.c_str());
|
||||
enabled->setChecked(metadata.enabled);
|
||||
update();
|
||||
}
|
||||
|
||||
void Remove() {
|
||||
emu->getCheats().removeCheat(metadata.handle);
|
||||
cheatList->takeItem(cheatList->row(listItem));
|
||||
deleteLater();
|
||||
}
|
||||
|
||||
const CheatMetadata& getMetadata() { return metadata; }
|
||||
void setMetadata(const CheatMetadata& metadata) { this->metadata = metadata; }
|
||||
|
||||
private:
|
||||
void checkboxChanged(int state);
|
||||
void editClicked();
|
||||
|
||||
Emulator* emu;
|
||||
CheatMetadata metadata;
|
||||
u32 handle;
|
||||
QLabel* name;
|
||||
QCheckBox* enabled;
|
||||
QListWidget* cheatList;
|
||||
QListWidgetItem* listItem;
|
||||
};
|
||||
|
||||
class CheatEditDialog : public QDialog {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
CheatEditDialog(Emulator* emu, CheatEntryWidget& cheatEntry);
|
||||
|
||||
void accepted();
|
||||
void rejected();
|
||||
|
||||
private:
|
||||
Emulator* emu;
|
||||
CheatEntryWidget& cheatEntry;
|
||||
QTextEdit* codeEdit;
|
||||
QLineEdit* nameEdit;
|
||||
};
|
21
include/panda_qt/elided_label.hpp
Normal file
|
@ -0,0 +1,21 @@
|
|||
#pragma once
|
||||
#include <QFontMetrics>
|
||||
#include <QLabel>
|
||||
#include <QString>
|
||||
#include <QWidget>
|
||||
|
||||
class ElidedLabel : public QLabel {
|
||||
Q_OBJECT
|
||||
public:
|
||||
explicit ElidedLabel(Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr);
|
||||
explicit ElidedLabel(QString text, Qt::TextElideMode elideMode = Qt::ElideLeft, QWidget* parent = nullptr);
|
||||
void setText(QString text);
|
||||
|
||||
protected:
|
||||
void resizeEvent(QResizeEvent* event);
|
||||
|
||||
private:
|
||||
void updateText();
|
||||
QString m_text;
|
||||
Qt::TextElideMode m_elideMode;
|
||||
};
|
|
@ -17,7 +17,9 @@
|
|||
#include "panda_qt/about_window.hpp"
|
||||
#include "panda_qt/cheats_window.hpp"
|
||||
#include "panda_qt/config_window.hpp"
|
||||
#include "panda_qt/patch_window.hpp"
|
||||
#include "panda_qt/screen.hpp"
|
||||
#include "panda_qt/shader_editor.hpp"
|
||||
#include "panda_qt/text_editor.hpp"
|
||||
#include "services/hid.hpp"
|
||||
|
||||
|
@ -47,6 +49,8 @@ class MainWindow : public QMainWindow {
|
|||
EditCheat,
|
||||
PressTouchscreen,
|
||||
ReleaseTouchscreen,
|
||||
ReloadUbershader,
|
||||
SetScreenSize,
|
||||
};
|
||||
|
||||
// Tagged union representing our message queue messages
|
||||
|
@ -78,6 +82,11 @@ class MainWindow : public QMainWindow {
|
|||
u16 x;
|
||||
u16 y;
|
||||
} touchscreen;
|
||||
|
||||
struct {
|
||||
u32 width;
|
||||
u32 height;
|
||||
} screenSize;
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -90,13 +99,15 @@ class MainWindow : public QMainWindow {
|
|||
std::mutex messageQueueMutex;
|
||||
std::vector<EmulatorMessage> messageQueue;
|
||||
|
||||
QMenuBar* menuBar = nullptr;
|
||||
InputMappings keyboardMappings;
|
||||
ScreenWidget screen;
|
||||
ScreenWidget* screen;
|
||||
AboutWindow* aboutWindow;
|
||||
ConfigWindow* configWindow;
|
||||
CheatsWindow* cheatsEditor;
|
||||
TextEditorWindow* luaEditor;
|
||||
QMenuBar* menuBar = nullptr;
|
||||
PatchWindow* patchWindow;
|
||||
ShaderEditorWindow* shaderEditor;
|
||||
|
||||
// We use SDL's game controller API since it's the sanest API that supports as many controllers as possible
|
||||
SDL_GameController* gameController = nullptr;
|
||||
|
@ -108,17 +119,17 @@ class MainWindow : public QMainWindow {
|
|||
void selectROM();
|
||||
void dumpDspFirmware();
|
||||
void dumpRomFS();
|
||||
void openLuaEditor();
|
||||
void openCheatsEditor();
|
||||
void showAboutMenu();
|
||||
void initControllers();
|
||||
void pollControllers();
|
||||
void setupControllerSensors(SDL_GameController* controller);
|
||||
void sendMessage(const EmulatorMessage& message);
|
||||
void dispatchMessage(const EmulatorMessage& message);
|
||||
|
||||
// Tracks whether we are using an OpenGL-backed renderer or a Vulkan-backed renderer
|
||||
bool usingGL = false;
|
||||
bool usingVk = false;
|
||||
bool usingMtl = false;
|
||||
|
||||
// Variables to keep track of whether the user is controlling the 3DS analog stick with their keyboard
|
||||
// This is done so when a gamepad is connected, we won't automatically override the 3DS analog stick settings with the gamepad's state
|
||||
|
@ -130,11 +141,15 @@ class MainWindow : public QMainWindow {
|
|||
MainWindow(QApplication* app, QWidget* parent = nullptr);
|
||||
~MainWindow();
|
||||
|
||||
void closeEvent(QCloseEvent* event) override;
|
||||
void keyPressEvent(QKeyEvent* event) override;
|
||||
void keyReleaseEvent(QKeyEvent* event) override;
|
||||
void mousePressEvent(QMouseEvent* event) override;
|
||||
void mouseReleaseEvent(QMouseEvent* event) override;
|
||||
|
||||
void loadLuaScript(const std::string& code);
|
||||
void reloadShader(const std::string& shader);
|
||||
void editCheat(u32 handle, const std::vector<uint8_t>& cheat, const std::function<void(u32)>& callback);
|
||||
|
||||
void handleScreenResize(u32 width, u32 height);
|
||||
};
|
||||
|
|
31
include/panda_qt/patch_window.hpp
Normal file
|
@ -0,0 +1,31 @@
|
|||
#pragma once
|
||||
#include <QLabel>
|
||||
#include <QMessageBox>
|
||||
#include <QWidget>
|
||||
#include <filesystem>
|
||||
|
||||
#include "panda_qt/elided_label.hpp"
|
||||
|
||||
class PatchWindow final : public QWidget {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
PatchWindow(QWidget* parent = nullptr);
|
||||
~PatchWindow() = default;
|
||||
|
||||
private:
|
||||
// Show a message box
|
||||
// Title: Title of the message box to display
|
||||
// Message: Message to display
|
||||
// Icon: The type of icon (error, warning, information, etc) to display
|
||||
// IconPath: If non-null, then a path to an icon in our assets to display on the OK button
|
||||
void displayMessage(
|
||||
const QString& title, const QString& message, QMessageBox::Icon icon = QMessageBox::Icon::Warning, const char* iconPath = nullptr
|
||||
);
|
||||
|
||||
std::filesystem::path inputPath = "";
|
||||
std::filesystem::path patchPath = "";
|
||||
|
||||
ElidedLabel* inputPathLabel = nullptr;
|
||||
ElidedLabel* patchPathLabel = nullptr;
|
||||
};
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <QWidget>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
#include "gl/context.h"
|
||||
|
@ -10,15 +11,28 @@ class ScreenWidget : public QWidget {
|
|||
Q_OBJECT
|
||||
|
||||
public:
|
||||
ScreenWidget(QWidget* parent = nullptr);
|
||||
using ResizeCallback = std::function<void(u32, u32)>;
|
||||
|
||||
ScreenWidget(ResizeCallback resizeCallback, QWidget* parent = nullptr);
|
||||
void resizeEvent(QResizeEvent* event) override;
|
||||
// Called by the emulator thread for resizing the actual GL surface, since the emulator thread owns the GL context
|
||||
void resizeSurface(u32 width, u32 height);
|
||||
|
||||
GL::Context* getGLContext() { return glContext.get(); }
|
||||
|
||||
// Dimensions of our output surface
|
||||
u32 surfaceWidth = 0;
|
||||
u32 surfaceHeight = 0;
|
||||
WindowInfo windowInfo;
|
||||
|
||||
// Cached "previous" dimensions, used when resizing our window
|
||||
u32 previousWidth = 0;
|
||||
u32 previousHeight = 0;
|
||||
|
||||
private:
|
||||
std::unique_ptr<GL::Context> glContext = nullptr;
|
||||
ResizeCallback resizeCallback;
|
||||
|
||||
bool createGLContext();
|
||||
|
||||
qreal devicePixelRatioFromScreen() const;
|
||||
|
|
27
include/panda_qt/shader_editor.hpp
Normal file
|
@ -0,0 +1,27 @@
|
|||
#pragma once
|
||||
|
||||
#include <QApplication>
|
||||
#include <QDialog>
|
||||
#include <QWidget>
|
||||
#include <string>
|
||||
|
||||
#include "zep.h"
|
||||
#include "zep/mode_repl.h"
|
||||
#include "zep/regress.h"
|
||||
|
||||
class ShaderEditorWindow : public QDialog {
|
||||
Q_OBJECT
|
||||
|
||||
private:
|
||||
Zep::ZepWidget_Qt zepWidget;
|
||||
Zep::IZepReplProvider replProvider;
|
||||
static constexpr float fontSize = 14.0f;
|
||||
|
||||
public:
|
||||
// Whether this backend supports shader editor
|
||||
bool supported = true;
|
||||
|
||||
ShaderEditorWindow(QWidget* parent, const std::string& filename, const std::string& initialText);
|
||||
void setText(const std::string& text) { zepWidget.GetEditor().GetMRUBuffer()->SetText(text); }
|
||||
void setEnable(bool enable);
|
||||
};
|
|
@ -23,6 +23,8 @@ class FrontendSDL {
|
|||
SDL_GameController* gameController = nullptr;
|
||||
InputMappings keyboardMappings;
|
||||
|
||||
u32 windowWidth = 400;
|
||||
u32 windowHeight = 480;
|
||||
int gameControllerID;
|
||||
bool programRunning = true;
|
||||
|
||||
|
@ -35,4 +37,6 @@ class FrontendSDL {
|
|||
// And so the user can still use the keyboard to control the analog
|
||||
bool keyboardAnalogX = false;
|
||||
bool keyboardAnalogY = false;
|
||||
|
||||
void setupControllerSensors(SDL_GameController* controller);
|
||||
};
|
69
include/renderdoc.hpp
Normal file
|
@ -0,0 +1,69 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
#ifdef PANDA3DS_ENABLE_RENDERDOC
|
||||
namespace Renderdoc {
|
||||
// Loads renderdoc dynamic library module.
|
||||
void loadRenderdoc();
|
||||
|
||||
// Begins a capture if a renderdoc instance is attached.
|
||||
void startCapture();
|
||||
|
||||
// Ends current renderdoc capture.
|
||||
void endCapture();
|
||||
|
||||
// Triggers capturing process.
|
||||
void triggerCapture();
|
||||
|
||||
// Sets output directory for captures
|
||||
void setOutputDir(const std::string& path, const std::string& prefix);
|
||||
|
||||
// Returns whether we've compiled with Renderdoc support
|
||||
static constexpr bool isSupported() { return true; }
|
||||
} // namespace Renderdoc
|
||||
#else
|
||||
namespace Renderdoc {
|
||||
static void loadRenderdoc() {}
|
||||
static void startCapture() { Helpers::panic("Tried to start a Renderdoc capture while support for renderdoc is disabled") }
|
||||
static void endCapture() { Helpers::panic("Tried to end a Renderdoc capture while support for renderdoc is disabled") }
|
||||
static void triggerCapture() { Helpers::panic("Tried to trigger a Renderdoc capture while support for renderdoc is disabled") }
|
||||
static void setOutputDir(const std::string& path, const std::string& prefix) {}
|
||||
static constexpr bool isSupported() { return false; }
|
||||
} // namespace Renderdoc
|
||||
#endif
|
||||
|
||||
namespace Renderdoc {
|
||||
// RAII scope class that encloses a Renderdoc capture, as long as it's triggered by triggerCapture
|
||||
struct Scope {
|
||||
Scope() { Renderdoc::startCapture(); }
|
||||
~Scope() { Renderdoc::endCapture(); }
|
||||
|
||||
Scope(const Scope&) = delete;
|
||||
Scope& operator=(const Scope&) = delete;
|
||||
|
||||
Scope(Scope&&) = delete;
|
||||
Scope& operator=(const Scope&&) = delete;
|
||||
};
|
||||
|
||||
// RAII scope class that encloses a Renderdoc capture. Unlike regular Scope it doesn't wait for a trigger, it will always issue the capture
|
||||
// trigger on its own and take a capture
|
||||
struct InstantScope {
|
||||
InstantScope() {
|
||||
Renderdoc::triggerCapture();
|
||||
Renderdoc::startCapture();
|
||||
}
|
||||
|
||||
~InstantScope() { Renderdoc::endCapture(); }
|
||||
|
||||
InstantScope(const InstantScope&) = delete;
|
||||
InstantScope& operator=(const InstantScope&) = delete;
|
||||
|
||||
InstantScope(InstantScope&&) = delete;
|
||||
InstantScope& operator=(const InstantScope&&) = delete;
|
||||
};
|
||||
} // namespace Renderdoc
|
|
@ -1,8 +1,10 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <span>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <string>
|
||||
|
||||
#include "PICA/draw_acceleration.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
@ -16,12 +18,16 @@ enum class RendererType : s8 {
|
|||
Null = 0,
|
||||
OpenGL = 1,
|
||||
Vulkan = 2,
|
||||
Software = 3,
|
||||
Metal = 3,
|
||||
Software = 4,
|
||||
};
|
||||
|
||||
class GPU;
|
||||
struct EmulatorConfig;
|
||||
struct SDL_Window;
|
||||
|
||||
class GPU;
|
||||
class ShaderUnit;
|
||||
|
||||
class Renderer {
|
||||
protected:
|
||||
GPU& gpu;
|
||||
|
@ -45,6 +51,8 @@ class Renderer {
|
|||
u32 outputWindowWidth = 400;
|
||||
u32 outputWindowHeight = 240 * 2;
|
||||
|
||||
EmulatorConfig* emulatorConfig = nullptr;
|
||||
|
||||
public:
|
||||
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
virtual ~Renderer();
|
||||
|
@ -66,6 +74,19 @@ class Renderer {
|
|||
// This function does things like write back or cache necessary state before we delete our context
|
||||
virtual void deinitGraphicsContext() = 0;
|
||||
|
||||
// Functions for hooking up the renderer core to the frontend's shader editor for editing ubershaders in real time
|
||||
// SupportsShaderReload: Indicates whether the backend offers ubershader reload support or not
|
||||
// GetUbershader/SetUbershader: Gets or sets the renderer's current ubershader
|
||||
virtual bool supportsShaderReload() { return false; }
|
||||
virtual std::string getUbershader() { return ""; }
|
||||
virtual void setUbershader(const std::string& shader) {}
|
||||
|
||||
// This function is called on every draw call before parsing vertex data.
|
||||
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
|
||||
// ubershaders and shadergen, and so on.
|
||||
// Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU
|
||||
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) { return false; }
|
||||
|
||||
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }
|
||||
|
@ -91,4 +112,6 @@ class Renderer {
|
|||
outputWindowWidth = width;
|
||||
outputWindowHeight = height;
|
||||
}
|
||||
|
||||
void setConfig(EmulatorConfig* config) { emulatorConfig = config; }
|
||||
};
|
||||
|
|
12
include/renderer_gl/gl_driver.hpp
Normal file
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
// Information about our OpenGL/OpenGL ES driver that we should keep track of
|
||||
// Stuff like whether specific extensions are supported, and potentially things like OpenGL context information
|
||||
namespace OpenGL {
|
||||
struct Driver {
|
||||
bool supportsExtFbFetch = false;
|
||||
bool supportsArmFbFetch = false;
|
||||
|
||||
bool supportFbFetch() const { return supportsExtFbFetch || supportsArmFbFetch; }
|
||||
};
|
||||
} // namespace OpenGL
|
|
@ -38,11 +38,14 @@ struct GLStateManager {
|
|||
|
||||
GLuint stencilMask;
|
||||
GLuint boundVAO;
|
||||
GLuint boundVBO;
|
||||
GLuint currentProgram;
|
||||
GLuint boundUBO;
|
||||
|
||||
GLenum depthFunc;
|
||||
GLenum logicOp;
|
||||
GLenum blendEquationRGB, blendEquationAlpha;
|
||||
GLenum blendFuncSourceRGB, blendFuncSourceAlpha;
|
||||
GLenum blendFuncDestRGB, blendFuncDestAlpha;
|
||||
|
||||
void reset();
|
||||
void resetBlend();
|
||||
|
@ -51,7 +54,7 @@ struct GLStateManager {
|
|||
void resetColourMask();
|
||||
void resetDepth();
|
||||
void resetVAO();
|
||||
void resetVBO();
|
||||
void resetBuffers();
|
||||
void resetProgram();
|
||||
void resetScissor();
|
||||
void resetStencil();
|
||||
|
@ -169,13 +172,6 @@ struct GLStateManager {
|
|||
}
|
||||
}
|
||||
|
||||
void bindVBO(GLuint handle) {
|
||||
if (boundVBO != handle) {
|
||||
boundVBO = handle;
|
||||
glBindBuffer(GL_ARRAY_BUFFER, handle);
|
||||
}
|
||||
}
|
||||
|
||||
void useProgram(GLuint handle) {
|
||||
if (currentProgram != handle) {
|
||||
currentProgram = handle;
|
||||
|
@ -183,8 +179,14 @@ struct GLStateManager {
|
|||
}
|
||||
}
|
||||
|
||||
void bindUBO(GLuint handle) {
|
||||
if (boundUBO != handle) {
|
||||
boundUBO = handle;
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, boundUBO);
|
||||
}
|
||||
}
|
||||
|
||||
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
|
||||
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
|
||||
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
|
||||
|
||||
void setColourMask(bool r, bool g, bool b, bool a) {
|
||||
|
@ -224,6 +226,41 @@ struct GLStateManager {
|
|||
}
|
||||
|
||||
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
|
||||
|
||||
// Counterpart to glBlendEquationSeparate
|
||||
void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) {
|
||||
if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) {
|
||||
blendEquationRGB = modeRGB;
|
||||
blendEquationAlpha = modeAlpha;
|
||||
|
||||
glBlendEquationSeparate(modeRGB, modeAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
// Counterpart to glBlendFuncSeparate
|
||||
void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) {
|
||||
if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha ||
|
||||
blendFuncDestAlpha != destAlpha) {
|
||||
|
||||
blendFuncSourceRGB = sourceRGB;
|
||||
blendFuncDestRGB = destRGB;
|
||||
blendFuncSourceAlpha = sourceAlpha;
|
||||
blendFuncDestAlpha = destAlpha;
|
||||
|
||||
glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha);
|
||||
}
|
||||
}
|
||||
|
||||
// Counterpart to regular glBlendEquation
|
||||
void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); }
|
||||
|
||||
void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) {
|
||||
setBlendEquation(static_cast<GLenum>(modeRGB), static_cast<GLenum>(modeAlpha));
|
||||
}
|
||||
|
||||
void setBlendEquation(OpenGL::BlendEquation mode) {
|
||||
setBlendEquation(static_cast<GLenum>(mode));
|
||||
}
|
||||
};
|
||||
|
||||
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");
|
||||
|
|
|
@ -1,11 +1,23 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/pica_vert_config.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen.hpp"
|
||||
#include "gl/stream_buffer.h"
|
||||
#include "gl_driver.hpp"
|
||||
#include "gl_state.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
|
@ -22,27 +34,48 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::Program triangleProgram;
|
||||
OpenGL::Program displayProgram;
|
||||
|
||||
OpenGL::VertexArray vao;
|
||||
// VAO for when not using accelerated vertex shaders. Contains attribute declarations matching to the PICA fixed function fragment attributes
|
||||
OpenGL::VertexArray defaultVAO;
|
||||
// VAO for when using accelerated vertex shaders. The PICA vertex shader inputs are passed as attributes without CPU processing.
|
||||
OpenGL::VertexArray hwShaderVAO;
|
||||
OpenGL::VertexBuffer vbo;
|
||||
|
||||
// TEV configuration uniform locations
|
||||
GLint textureEnvSourceLoc = -1;
|
||||
GLint textureEnvOperandLoc = -1;
|
||||
GLint textureEnvCombinerLoc = -1;
|
||||
GLint textureEnvColorLoc = -1;
|
||||
GLint textureEnvScaleLoc = -1;
|
||||
// Data
|
||||
struct {
|
||||
// TEV configuration uniform locations
|
||||
GLint textureEnvSourceLoc = -1;
|
||||
GLint textureEnvOperandLoc = -1;
|
||||
GLint textureEnvCombinerLoc = -1;
|
||||
GLint textureEnvColorLoc = -1;
|
||||
GLint textureEnvScaleLoc = -1;
|
||||
|
||||
// Uniform of PICA registers
|
||||
GLint picaRegLoc = -1;
|
||||
// Uniform of PICA registers
|
||||
GLint picaRegLoc = -1;
|
||||
|
||||
// Depth configuration uniform locations
|
||||
GLint depthOffsetLoc = -1;
|
||||
GLint depthScaleLoc = -1;
|
||||
GLint depthmapEnableLoc = -1;
|
||||
// Depth configuration uniform locations
|
||||
GLint depthOffsetLoc = -1;
|
||||
GLint depthScaleLoc = -1;
|
||||
GLint depthmapEnableLoc = -1;
|
||||
} ubershaderData;
|
||||
|
||||
float oldDepthScale = -1.0;
|
||||
float oldDepthOffset = 0.0;
|
||||
bool oldDepthmapEnable = false;
|
||||
// Set by prepareForDraw, tells us whether the current draw is using hw-accelerated shader
|
||||
bool usingAcceleratedShader = false;
|
||||
bool performIndexedRender = false;
|
||||
bool usingShortIndices = false;
|
||||
|
||||
// Set by prepareForDraw, metadata for indexed renders
|
||||
GLuint minimumIndex = 0;
|
||||
GLuint maximumIndex = 0;
|
||||
void* hwIndexBufferOffset = nullptr;
|
||||
|
||||
// When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw
|
||||
u32 previousAttributeMask = 0;
|
||||
|
||||
// Cached pointer to the current vertex shader when using HW accelerated shaders
|
||||
OpenGL::Shader* generatedVertexShader = nullptr;
|
||||
|
||||
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
|
||||
|
@ -53,25 +86,81 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::VertexBuffer dummyVBO;
|
||||
|
||||
OpenGL::Texture screenTexture;
|
||||
GLuint lightLUTTextureArray;
|
||||
OpenGL::Texture LUTTexture;
|
||||
OpenGL::Framebuffer screenFramebuffer;
|
||||
OpenGL::Texture blankTexture;
|
||||
// The "default" vertex shader to use when using specialized shaders but not PICA vertex shader -> GLSL recompilation
|
||||
// We can compile this once and then link it with all other generated fragment shaders
|
||||
OpenGL::Shader defaultShadergenVs;
|
||||
GLuint shadergenFragmentUBO;
|
||||
// UBO for uploading the PICA uniforms when using hw shaders
|
||||
GLuint hwShaderUniformUBO;
|
||||
|
||||
using StreamBuffer = OpenGLStreamBuffer;
|
||||
std::unique_ptr<StreamBuffer> hwVertexBuffer;
|
||||
std::unique_ptr<StreamBuffer> hwIndexBuffer;
|
||||
|
||||
// Cache of fixed attribute values so that we don't do any duplicate updates
|
||||
std::array<std::array<float, 4>, 16> fixedAttrValues;
|
||||
|
||||
// Cached recompiled fragment shader
|
||||
struct CachedProgram {
|
||||
OpenGL::Program program;
|
||||
};
|
||||
|
||||
struct ShaderCache {
|
||||
std::unordered_map<PICA::VertConfig, std::optional<OpenGL::Shader>> vertexShaderCache;
|
||||
std::unordered_map<PICA::FragmentConfig, OpenGL::Shader> fragmentShaderCache;
|
||||
|
||||
// Program cache indexed by GLuints for the vertex and fragment shader to use
|
||||
// Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint
|
||||
std::unordered_map<u64, CachedProgram> programCache;
|
||||
|
||||
void clear() {
|
||||
for (auto& it : programCache) {
|
||||
CachedProgram& cachedProgram = it.second;
|
||||
cachedProgram.program.free();
|
||||
}
|
||||
|
||||
for (auto& it : vertexShaderCache) {
|
||||
if (it.second.has_value()) {
|
||||
it.second->free();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& it : fragmentShaderCache) {
|
||||
it.second.free();
|
||||
}
|
||||
|
||||
programCache.clear();
|
||||
vertexShaderCache.clear();
|
||||
fragmentShaderCache.clear();
|
||||
}
|
||||
};
|
||||
ShaderCache shaderCache;
|
||||
|
||||
OpenGL::Framebuffer getColourFBO();
|
||||
OpenGL::Texture getTexture(Texture& tex);
|
||||
OpenGL::Program& getSpecializedShader();
|
||||
|
||||
PICA::ShaderGen::FragmentGenerator fragShaderGen;
|
||||
OpenGL::Driver driverInfo;
|
||||
|
||||
MAKE_LOG_FUNCTION(log, rendererLogger)
|
||||
void setupBlending();
|
||||
void setupStencilTest(bool stencilEnable);
|
||||
void bindDepthBuffer();
|
||||
void setupTextureEnvState();
|
||||
void setupUbershaderTexEnv();
|
||||
void bindTexturesToSlots();
|
||||
void updateLightingLUT();
|
||||
void updateFogLUT();
|
||||
void initGraphicsContextInternal();
|
||||
|
||||
void accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel);
|
||||
|
||||
public:
|
||||
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: Renderer(gpu, internalRegs, externalRegs) {}
|
||||
: Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {}
|
||||
~RendererGL() override;
|
||||
|
||||
void reset() override;
|
||||
|
@ -82,12 +171,18 @@ class RendererGL final : public Renderer {
|
|||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
|
||||
void deinitGraphicsContext() override;
|
||||
|
||||
virtual bool supportsShaderReload() override { return true; }
|
||||
virtual std::string getUbershader() override;
|
||||
virtual void setUbershader(const std::string& shader) override;
|
||||
virtual bool prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) override;
|
||||
|
||||
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
|
||||
|
||||
// Note: The caller is responsible for deleting the currently bound FBO before calling this
|
||||
void setFBO(uint handle) { screenFramebuffer.m_handle = handle; }
|
||||
void resetStateManager() { gl.reset(); }
|
||||
void initUbershader(OpenGL::Program& program);
|
||||
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
|
||||
|
@ -95,4 +190,4 @@ class RendererGL final : public Renderer {
|
|||
|
||||
// Take a screenshot of the screen and store it in a file
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
};
|
|
@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
|
|||
class SurfaceCache {
|
||||
// Vanilla std::optional can't hold actual references
|
||||
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
|
||||
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
|
||||
std::is_same<SurfaceType, Texture>(), "Invalid surface type");
|
||||
|
||||
size_t size;
|
||||
size_t evictionIndex;
|
||||
|
|
74
include/renderer_mtl/mtl_blit_pipeline_cache.hpp
Normal file
|
@ -0,0 +1,74 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "objc_helper.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct BlitPipelineHash {
|
||||
// Formats
|
||||
ColorFmt colorFmt;
|
||||
DepthFmt depthFmt;
|
||||
};
|
||||
|
||||
// This pipeline only caches the pipeline with all of its color and depth attachment variations
|
||||
class BlitPipelineCache {
|
||||
public:
|
||||
BlitPipelineCache() = default;
|
||||
|
||||
~BlitPipelineCache() {
|
||||
reset();
|
||||
vertexFunction->release();
|
||||
fragmentFunction->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev, MTL::Function* vert, MTL::Function* frag) {
|
||||
device = dev;
|
||||
vertexFunction = vert;
|
||||
fragmentFunction = frag;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* get(BlitPipelineHash hash) {
|
||||
u8 intHash = ((u8)hash.colorFmt << 3) | (u8)hash.depthFmt;
|
||||
auto& pipeline = pipelineCache[intHash];
|
||||
if (!pipeline) {
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(vertexFunction);
|
||||
desc->setFragmentFunction(fragmentFunction);
|
||||
|
||||
auto colorAttachment = desc->colorAttachments()->object(0);
|
||||
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
|
||||
|
||||
desc->setDepthAttachmentPixelFormat(toMTLPixelFormatDepth(hash.depthFmt));
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
desc->setLabel(toNSString("Blit pipeline"));
|
||||
pipeline = device->newRenderPipelineState(desc, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating blit pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
|
||||
desc->release();
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& pair : pipelineCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
pipelineCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<u8, MTL::RenderPipelineState*> pipelineCache;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::Function* vertexFunction;
|
||||
MTL::Function* fragmentFunction;
|
||||
};
|
||||
} // namespace Metal
|
56
include/renderer_mtl/mtl_command_encoder.hpp
Normal file
|
@ -0,0 +1,56 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
namespace Metal {
|
||||
struct RenderState {
|
||||
MTL::RenderPipelineState* renderPipelineState = nullptr;
|
||||
MTL::DepthStencilState* depthStencilState = nullptr;
|
||||
MTL::Texture* textures[3] = {nullptr};
|
||||
MTL::SamplerState* samplerStates[3] = {nullptr};
|
||||
};
|
||||
|
||||
class CommandEncoder {
|
||||
public:
|
||||
void newRenderCommandEncoder(MTL::RenderCommandEncoder* rce) {
|
||||
renderCommandEncoder = rce;
|
||||
|
||||
// Reset the render state
|
||||
renderState = RenderState{};
|
||||
}
|
||||
|
||||
// Resource binding
|
||||
void setRenderPipelineState(MTL::RenderPipelineState* renderPipelineState) {
|
||||
if (renderPipelineState != renderState.renderPipelineState) {
|
||||
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
|
||||
renderState.renderPipelineState = renderPipelineState;
|
||||
}
|
||||
}
|
||||
|
||||
void setDepthStencilState(MTL::DepthStencilState* depthStencilState) {
|
||||
if (depthStencilState != renderState.depthStencilState) {
|
||||
renderCommandEncoder->setDepthStencilState(depthStencilState);
|
||||
renderState.depthStencilState = depthStencilState;
|
||||
}
|
||||
}
|
||||
|
||||
void setFragmentTexture(MTL::Texture* texture, u32 index) {
|
||||
if (texture != renderState.textures[index]) {
|
||||
renderCommandEncoder->setFragmentTexture(texture, index);
|
||||
renderState.textures[index] = texture;
|
||||
}
|
||||
}
|
||||
|
||||
void setFragmentSamplerState(MTL::SamplerState* samplerState, u32 index) {
|
||||
if (samplerState != renderState.samplerStates[index]) {
|
||||
renderCommandEncoder->setFragmentSamplerState(samplerState, index);
|
||||
renderState.samplerStates[index] = samplerState;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
|
||||
|
||||
RenderState renderState;
|
||||
};
|
||||
} // namespace Metal
|
6
include/renderer_mtl/mtl_common.hpp
Normal file
|
@ -0,0 +1,6 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#define GET_HELPER_TEXTURE_BINDING(binding) (30 - binding)
|
||||
#define GET_HELPER_SAMPLER_STATE_BINDING(binding) (15 - binding)
|
80
include/renderer_mtl/mtl_depth_stencil_cache.hpp
Normal file
|
@ -0,0 +1,80 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct DepthStencilHash {
|
||||
u32 stencilConfig;
|
||||
u16 stencilOpConfig;
|
||||
bool depthStencilWrite;
|
||||
u8 depthFunc;
|
||||
};
|
||||
|
||||
class DepthStencilCache {
|
||||
public:
|
||||
DepthStencilCache() = default;
|
||||
|
||||
~DepthStencilCache() { reset(); }
|
||||
|
||||
void set(MTL::Device* dev) { device = dev; }
|
||||
|
||||
MTL::DepthStencilState* get(DepthStencilHash hash) {
|
||||
u64 intHash =
|
||||
((u64)hash.depthStencilWrite << 56) | ((u64)hash.depthFunc << 48) | ((u64)hash.stencilConfig << 16) | (u64)hash.stencilOpConfig;
|
||||
auto& depthStencilState = depthStencilCache[intHash];
|
||||
if (!depthStencilState) {
|
||||
MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init();
|
||||
desc->setDepthWriteEnabled(hash.depthStencilWrite);
|
||||
desc->setDepthCompareFunction(toMTLCompareFunc(hash.depthFunc));
|
||||
|
||||
const bool stencilEnable = Helpers::getBit<0>(hash.stencilConfig);
|
||||
MTL::StencilDescriptor* stencilDesc = nullptr;
|
||||
if (stencilEnable) {
|
||||
const u8 stencilFunc = Helpers::getBits<4, 3>(hash.stencilConfig);
|
||||
const u8 stencilRefMask = Helpers::getBits<24, 8>(hash.stencilConfig);
|
||||
|
||||
const u32 stencilBufferMask = hash.depthStencilWrite ? Helpers::getBits<8, 8>(hash.stencilConfig) : 0;
|
||||
|
||||
const u8 stencilFailOp = Helpers::getBits<0, 3>(hash.stencilOpConfig);
|
||||
const u8 depthFailOp = Helpers::getBits<4, 3>(hash.stencilOpConfig);
|
||||
const u8 passOp = Helpers::getBits<8, 3>(hash.stencilOpConfig);
|
||||
|
||||
stencilDesc = MTL::StencilDescriptor::alloc()->init();
|
||||
stencilDesc->setStencilFailureOperation(toMTLStencilOperation(stencilFailOp));
|
||||
stencilDesc->setDepthFailureOperation(toMTLStencilOperation(depthFailOp));
|
||||
stencilDesc->setDepthStencilPassOperation(toMTLStencilOperation(passOp));
|
||||
stencilDesc->setStencilCompareFunction(toMTLCompareFunc(stencilFunc));
|
||||
stencilDesc->setReadMask(stencilRefMask);
|
||||
stencilDesc->setWriteMask(stencilBufferMask);
|
||||
|
||||
desc->setFrontFaceStencil(stencilDesc);
|
||||
desc->setBackFaceStencil(stencilDesc);
|
||||
}
|
||||
|
||||
depthStencilState = device->newDepthStencilState(desc);
|
||||
|
||||
desc->release();
|
||||
if (stencilDesc) {
|
||||
stencilDesc->release();
|
||||
}
|
||||
}
|
||||
|
||||
return depthStencilState;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& pair : depthStencilCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
depthStencilCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<u64, MTL::DepthStencilState*> depthStencilCache;
|
||||
MTL::Device* device;
|
||||
};
|
||||
} // namespace Metal
|
162
include/renderer_mtl/mtl_draw_pipeline_cache.hpp
Normal file
|
@ -0,0 +1,162 @@
|
|||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "objc_helper.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct DrawFragmentFunctionHash {
|
||||
u32 lightingConfig1; // 32 bits (TODO: check this)
|
||||
bool lightingEnabled; // 1 bit
|
||||
u8 lightingNumLights; // 3 bits
|
||||
// | ref | func | on |
|
||||
u16 alphaControl; // 12 bits (mask: 11111111 0111 0001)
|
||||
};
|
||||
|
||||
inline bool operator<(const DrawFragmentFunctionHash& l, const DrawFragmentFunctionHash& r) {
|
||||
if (!l.lightingEnabled && r.lightingEnabled) return true;
|
||||
if (l.lightingNumLights < r.lightingNumLights) return true;
|
||||
if (l.lightingConfig1 < r.lightingConfig1) return true;
|
||||
if (l.alphaControl < r.alphaControl) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
struct DrawPipelineHash { // 56 bits
|
||||
// Formats
|
||||
ColorFmt colorFmt; // 3 bits
|
||||
DepthFmt depthFmt; // 3 bits
|
||||
|
||||
// Blending
|
||||
bool blendEnabled; // 1 bit
|
||||
// | functions | aeq | ceq |
|
||||
u32 blendControl; // 22 bits (mask: 1111111111111111 00000111 00000111)
|
||||
u8 colorWriteMask; // 4 bits
|
||||
|
||||
DrawFragmentFunctionHash fragHash;
|
||||
};
|
||||
|
||||
inline bool operator<(const DrawPipelineHash& l, const DrawPipelineHash& r) {
|
||||
if ((u32)l.colorFmt < (u32)r.colorFmt) return true;
|
||||
if ((u32)l.depthFmt < (u32)r.depthFmt) return true;
|
||||
if (!l.blendEnabled && r.blendEnabled) return true;
|
||||
if (l.blendControl < r.blendControl) return true;
|
||||
if (l.colorWriteMask < r.colorWriteMask) return true;
|
||||
if (l.fragHash < r.fragHash) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// This pipeline only caches the pipeline with all of its color and depth attachment variations
|
||||
class DrawPipelineCache {
|
||||
public:
|
||||
DrawPipelineCache() = default;
|
||||
|
||||
~DrawPipelineCache() {
|
||||
reset();
|
||||
vertexDescriptor->release();
|
||||
vertexFunction->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev, MTL::Library* lib, MTL::Function* vert, MTL::VertexDescriptor* vertDesc) {
|
||||
device = dev;
|
||||
library = lib;
|
||||
vertexFunction = vert;
|
||||
vertexDescriptor = vertDesc;
|
||||
}
|
||||
|
||||
MTL::RenderPipelineState* get(DrawPipelineHash hash) {
|
||||
auto& pipeline = pipelineCache[hash];
|
||||
|
||||
if (!pipeline) {
|
||||
auto& fragmentFunction = fragmentFunctionCache[hash.fragHash];
|
||||
if (!fragmentFunction) {
|
||||
MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init();
|
||||
constants->setConstantValue(&hash.fragHash.lightingEnabled, MTL::DataTypeBool, NS::UInteger(0));
|
||||
constants->setConstantValue(&hash.fragHash.lightingNumLights, MTL::DataTypeUChar, NS::UInteger(1));
|
||||
constants->setConstantValue(&hash.fragHash.lightingConfig1, MTL::DataTypeUInt, NS::UInteger(2));
|
||||
constants->setConstantValue(&hash.fragHash.alphaControl, MTL::DataTypeUShort, NS::UInteger(3));
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
fragmentFunction = library->newFunction(NS::String::string("fragmentDraw", NS::ASCIIStringEncoding), constants, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating draw fragment function: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
constants->release();
|
||||
}
|
||||
|
||||
MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init();
|
||||
desc->setVertexFunction(vertexFunction);
|
||||
desc->setFragmentFunction(fragmentFunction);
|
||||
desc->setVertexDescriptor(vertexDescriptor);
|
||||
|
||||
auto colorAttachment = desc->colorAttachments()->object(0);
|
||||
colorAttachment->setPixelFormat(toMTLPixelFormatColor(hash.colorFmt));
|
||||
MTL::ColorWriteMask writeMask = 0;
|
||||
if (hash.colorWriteMask & 0x1) writeMask |= MTL::ColorWriteMaskRed;
|
||||
if (hash.colorWriteMask & 0x2) writeMask |= MTL::ColorWriteMaskGreen;
|
||||
if (hash.colorWriteMask & 0x4) writeMask |= MTL::ColorWriteMaskBlue;
|
||||
if (hash.colorWriteMask & 0x8) writeMask |= MTL::ColorWriteMaskAlpha;
|
||||
colorAttachment->setWriteMask(writeMask);
|
||||
if (hash.blendEnabled) {
|
||||
const u8 rgbEquation = hash.blendControl & 0x7;
|
||||
const u8 alphaEquation = Helpers::getBits<8, 3>(hash.blendControl);
|
||||
|
||||
// Get blending functions
|
||||
const u8 rgbSourceFunc = Helpers::getBits<16, 4>(hash.blendControl);
|
||||
const u8 rgbDestFunc = Helpers::getBits<20, 4>(hash.blendControl);
|
||||
const u8 alphaSourceFunc = Helpers::getBits<24, 4>(hash.blendControl);
|
||||
const u8 alphaDestFunc = Helpers::getBits<28, 4>(hash.blendControl);
|
||||
|
||||
colorAttachment->setBlendingEnabled(true);
|
||||
colorAttachment->setRgbBlendOperation(toMTLBlendOperation(rgbEquation));
|
||||
colorAttachment->setAlphaBlendOperation(toMTLBlendOperation(alphaEquation));
|
||||
colorAttachment->setSourceRGBBlendFactor(toMTLBlendFactor(rgbSourceFunc));
|
||||
colorAttachment->setDestinationRGBBlendFactor(toMTLBlendFactor(rgbDestFunc));
|
||||
colorAttachment->setSourceAlphaBlendFactor(toMTLBlendFactor(alphaSourceFunc));
|
||||
colorAttachment->setDestinationAlphaBlendFactor(toMTLBlendFactor(alphaDestFunc));
|
||||
}
|
||||
|
||||
MTL::PixelFormat depthFormat = toMTLPixelFormatDepth(hash.depthFmt);
|
||||
desc->setDepthAttachmentPixelFormat(depthFormat);
|
||||
if (hash.depthFmt == DepthFmt::Depth24Stencil8) desc->setStencilAttachmentPixelFormat(depthFormat);
|
||||
|
||||
NS::Error* error = nullptr;
|
||||
desc->setLabel(toNSString("Draw pipeline"));
|
||||
pipeline = device->newRenderPipelineState(desc, &error);
|
||||
if (error) {
|
||||
Helpers::panic("Error creating draw pipeline state: %s", error->description()->cString(NS::ASCIIStringEncoding));
|
||||
}
|
||||
|
||||
desc->release();
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
for (auto& pair : pipelineCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
pipelineCache.clear();
|
||||
|
||||
for (auto& pair : fragmentFunctionCache) {
|
||||
pair.second->release();
|
||||
}
|
||||
fragmentFunctionCache.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<DrawPipelineHash, MTL::RenderPipelineState*> pipelineCache;
|
||||
std::map<DrawFragmentFunctionHash, MTL::Function*> fragmentFunctionCache;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::Library* library;
|
||||
MTL::Function* vertexFunction;
|
||||
MTL::VertexDescriptor* vertexDescriptor;
|
||||
};
|
||||
|
||||
} // namespace Metal
|
20
include/renderer_mtl/mtl_lut_texture.hpp
Normal file
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
namespace Metal {
|
||||
|
||||
class LutTexture {
|
||||
public:
|
||||
LutTexture(MTL::Device* device, MTL::TextureType type, MTL::PixelFormat pixelFormat, u32 width, u32 height, const char* name);
|
||||
~LutTexture();
|
||||
u32 getNextIndex();
|
||||
|
||||
MTL::Texture* getTexture() { return texture; }
|
||||
u32 getCurrentIndex() { return currentIndex; }
|
||||
private:
|
||||
MTL::Texture* texture;
|
||||
u32 currentIndex = 0;
|
||||
};
|
||||
|
||||
} // namespace Metal
|
91
include/renderer_mtl/mtl_render_target.hpp
Normal file
|
@ -0,0 +1,91 @@
|
|||
#pragma once
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "objc_helper.hpp"
|
||||
#include "opengl.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
||||
namespace Metal {
|
||||
template <typename Format_t>
|
||||
struct RenderTarget {
|
||||
MTL::Device* device;
|
||||
|
||||
u32 location;
|
||||
Format_t format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
|
||||
MTL::Texture* texture = nullptr;
|
||||
|
||||
RenderTarget() : valid(false) {}
|
||||
|
||||
RenderTarget(MTL::Device* dev, u32 loc, Format_t format, u32 x, u32 y, bool valid = true)
|
||||
: device(dev), location(loc), format(format), size({x, y}), valid(valid) {
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
|
||||
const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
|
||||
const u32 x0 = (startOffset % (size.x() * 8)) / 8;
|
||||
const u32 y0 = (startOffset / (size.x() * 8)) * 8;
|
||||
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
|
||||
}
|
||||
|
||||
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
|
||||
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
|
||||
bool matches(RenderTarget& other) {
|
||||
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
void allocate() {
|
||||
MTL::PixelFormat pixelFormat = MTL::PixelFormatInvalid;
|
||||
if (std::is_same<Format_t, PICA::ColorFmt>::value) {
|
||||
pixelFormat = PICA::toMTLPixelFormatColor((PICA::ColorFmt)format);
|
||||
} else if (std::is_same<Format_t, PICA::DepthFmt>::value) {
|
||||
pixelFormat = PICA::toMTLPixelFormatDepth((PICA::DepthFmt)format);
|
||||
} else {
|
||||
panic("Invalid format type");
|
||||
}
|
||||
|
||||
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
|
||||
descriptor->setTextureType(MTL::TextureType2D);
|
||||
descriptor->setPixelFormat(pixelFormat);
|
||||
descriptor->setWidth(size.u());
|
||||
descriptor->setHeight(size.v());
|
||||
descriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead);
|
||||
descriptor->setStorageMode(MTL::StorageModePrivate);
|
||||
texture = device->newTexture(descriptor);
|
||||
texture->setLabel(toNSString(
|
||||
std::string(std::is_same<Format_t, PICA::ColorFmt>::value ? "Color" : "Depth") + " render target " + std::to_string(size.u()) + "x" +
|
||||
std::to_string(size.v())
|
||||
));
|
||||
descriptor->release();
|
||||
}
|
||||
|
||||
void free() {
|
||||
valid = false;
|
||||
|
||||
if (texture) {
|
||||
texture->release();
|
||||
}
|
||||
}
|
||||
|
||||
u64 sizeInBytes() { return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format); }
|
||||
};
|
||||
|
||||
using ColorRenderTarget = RenderTarget<PICA::ColorFmt>;
|
||||
using DepthStencilRenderTarget = RenderTarget<PICA::DepthFmt>;
|
||||
} // namespace Metal
|
73
include/renderer_mtl/mtl_texture.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include "PICA/regs.hpp"
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "opengl.hpp"
|
||||
#include "renderer_mtl/pica_to_mtl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
||||
namespace Metal {
|
||||
struct Texture {
|
||||
MTL::Device* device;
|
||||
|
||||
u32 location;
|
||||
u32 config; // Magnification/minification filter, wrapping configs, etc
|
||||
PICA::TextureFmt format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
|
||||
PICA::PixelFormatInfo formatInfo;
|
||||
MTL::Texture* texture = nullptr;
|
||||
MTL::SamplerState* sampler = nullptr;
|
||||
|
||||
Texture() : valid(false) {}
|
||||
|
||||
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
|
||||
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
|
||||
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
|
||||
bool matches(Texture& other) {
|
||||
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
void allocate();
|
||||
void setNewConfig(u32 newConfig);
|
||||
void decodeTexture(std::span<const u8> data);
|
||||
void free();
|
||||
u64 sizeInBytes();
|
||||
|
||||
u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||
|
||||
// Get the morton interleave offset of a texel based on its U and V values
|
||||
static u32 mortonInterleave(u32 u, u32 v);
|
||||
// Get the byte offset of texel (u, v) in the texture
|
||||
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
|
||||
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
|
||||
|
||||
// Returns the format of this texture as a string
|
||||
std::string_view formatToString() { return PICA::textureFormatToString(format); }
|
||||
|
||||
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
|
||||
// TODO: Make hasAlpha a template parameter
|
||||
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
|
||||
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
|
||||
};
|
||||
} // namespace Metal
|
83
include/renderer_mtl/mtl_vertex_buffer_cache.hpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "pica_to_mtl.hpp"
|
||||
|
||||
|
||||
using namespace PICA;
|
||||
|
||||
namespace Metal {
|
||||
struct BufferHandle {
|
||||
MTL::Buffer* buffer;
|
||||
usize offset;
|
||||
};
|
||||
|
||||
class VertexBufferCache {
|
||||
// 128MB buffer for caching vertex data
|
||||
static constexpr usize CACHE_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
public:
|
||||
VertexBufferCache() = default;
|
||||
|
||||
~VertexBufferCache() {
|
||||
endFrame();
|
||||
buffer->release();
|
||||
}
|
||||
|
||||
void set(MTL::Device* dev) {
|
||||
device = dev;
|
||||
create();
|
||||
}
|
||||
|
||||
void endFrame() {
|
||||
ptr = 0;
|
||||
for (auto buffer : additionalAllocations) {
|
||||
buffer->release();
|
||||
}
|
||||
additionalAllocations.clear();
|
||||
}
|
||||
|
||||
BufferHandle get(const void* data, usize size) {
|
||||
// If the vertex buffer is too large, just create a new one
|
||||
if (ptr + size > CACHE_BUFFER_SIZE) {
|
||||
MTL::Buffer* newBuffer = device->newBuffer(data, size, MTL::ResourceStorageModeShared);
|
||||
newBuffer->setLabel(toNSString("Additional vertex buffer"));
|
||||
additionalAllocations.push_back(newBuffer);
|
||||
Helpers::warn("Vertex buffer doesn't have enough space, creating a new buffer");
|
||||
|
||||
return BufferHandle{newBuffer, 0};
|
||||
}
|
||||
|
||||
// Copy the data into the buffer
|
||||
std::memcpy((char*)buffer->contents() + ptr, data, size);
|
||||
|
||||
auto oldPtr = ptr;
|
||||
ptr += size;
|
||||
|
||||
return BufferHandle{buffer, oldPtr};
|
||||
}
|
||||
|
||||
void reset() {
|
||||
endFrame();
|
||||
|
||||
if (buffer) {
|
||||
buffer->release();
|
||||
create();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
MTL::Buffer* buffer = nullptr;
|
||||
usize ptr = 0;
|
||||
std::vector<MTL::Buffer*> additionalAllocations;
|
||||
|
||||
MTL::Device* device;
|
||||
|
||||
void create() {
|
||||
buffer = device->newBuffer(CACHE_BUFFER_SIZE, MTL::ResourceStorageModeShared);
|
||||
buffer->setLabel(toNSString("Shared vertex buffer"));
|
||||
}
|
||||
};
|
||||
} // namespace Metal
|
12
include/renderer_mtl/objc_helper.hpp
Normal file
|
@ -0,0 +1,12 @@
|
|||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "mtl_common.hpp"
|
||||
|
||||
namespace Metal {
|
||||
dispatch_data_t createDispatchData(const void* data, size_t size);
|
||||
} // namespace Metal
|
||||
|
||||
// Cast from std::string to NS::String*
|
||||
inline NS::String* toNSString(const std::string& str) { return NS::String::string(str.c_str(), NS::ASCIIStringEncoding); }
|
152
include/renderer_mtl/pica_to_mtl.hpp
Normal file
|
@ -0,0 +1,152 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct PixelFormatInfo {
|
||||
MTL::PixelFormat pixelFormat;
|
||||
size_t bytesPerTexel;
|
||||
};
|
||||
|
||||
constexpr PixelFormatInfo pixelFormatInfos[14] = {
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // RGB8
|
||||
{MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551
|
||||
{MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // RGBA4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // IA8
|
||||
{MTL::PixelFormatRG8Unorm, 2}, // RG8
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // I8
|
||||
{MTL::PixelFormatA8Unorm, 1}, // A8
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // IA4
|
||||
{MTL::PixelFormatABGR4Unorm, 2}, // I4
|
||||
{MTL::PixelFormatA8Unorm, 1}, // A4
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1
|
||||
{MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4
|
||||
};
|
||||
|
||||
inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast<int>(format)]; }
|
||||
|
||||
inline MTL::PixelFormat toMTLPixelFormatColor(ColorFmt format) {
|
||||
switch (format) {
|
||||
case ColorFmt::RGBA8: return MTL::PixelFormatRGBA8Unorm;
|
||||
case ColorFmt::RGB8: return MTL::PixelFormatRGBA8Unorm;
|
||||
case ColorFmt::RGBA5551: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatBGR5A1Unorm?
|
||||
case ColorFmt::RGB565: return MTL::PixelFormatRGBA8Unorm; // TODO: use MTL::PixelFormatB5G6R5Unorm?
|
||||
case ColorFmt::RGBA4: return MTL::PixelFormatABGR4Unorm;
|
||||
}
|
||||
}
|
||||
|
||||
inline MTL::PixelFormat toMTLPixelFormatDepth(DepthFmt format) {
|
||||
switch (format) {
|
||||
case DepthFmt::Depth16: return MTL::PixelFormatDepth16Unorm;
|
||||
case DepthFmt::Unknown1: return MTL::PixelFormatInvalid;
|
||||
case DepthFmt::Depth24:
|
||||
return MTL::PixelFormatDepth32Float; // Metal does not support 24-bit depth formats
|
||||
// Apple sillicon doesn't support 24-bit depth buffers, so we use 32-bit instead
|
||||
case DepthFmt::Depth24Stencil8: return MTL::PixelFormatDepth32Float_Stencil8;
|
||||
}
|
||||
}
|
||||
|
||||
inline MTL::CompareFunction toMTLCompareFunc(u8 func) {
|
||||
switch (func) {
|
||||
case 0: return MTL::CompareFunctionNever;
|
||||
case 1: return MTL::CompareFunctionAlways;
|
||||
case 2: return MTL::CompareFunctionEqual;
|
||||
case 3: return MTL::CompareFunctionNotEqual;
|
||||
case 4: return MTL::CompareFunctionLess;
|
||||
case 5: return MTL::CompareFunctionLessEqual;
|
||||
case 6: return MTL::CompareFunctionGreater;
|
||||
case 7: return MTL::CompareFunctionGreaterEqual;
|
||||
default: Helpers::panic("Unknown compare function %u", func);
|
||||
}
|
||||
|
||||
return MTL::CompareFunctionAlways;
|
||||
}
|
||||
|
||||
inline MTL::BlendOperation toMTLBlendOperation(u8 op) {
|
||||
switch (op) {
|
||||
case 0: return MTL::BlendOperationAdd;
|
||||
case 1: return MTL::BlendOperationSubtract;
|
||||
case 2: return MTL::BlendOperationReverseSubtract;
|
||||
case 3: return MTL::BlendOperationMin;
|
||||
case 4: return MTL::BlendOperationMax;
|
||||
case 5: return MTL::BlendOperationAdd; // Unused (same as 0)
|
||||
case 6: return MTL::BlendOperationAdd; // Unused (same as 0)
|
||||
case 7: return MTL::BlendOperationAdd; // Unused (same as 0)
|
||||
default: Helpers::panic("Unknown blend operation %u", op);
|
||||
}
|
||||
|
||||
return MTL::BlendOperationAdd;
|
||||
}
|
||||
|
||||
inline MTL::BlendFactor toMTLBlendFactor(u8 factor) {
|
||||
switch (factor) {
|
||||
case 0: return MTL::BlendFactorZero;
|
||||
case 1: return MTL::BlendFactorOne;
|
||||
case 2: return MTL::BlendFactorSourceColor;
|
||||
case 3: return MTL::BlendFactorOneMinusSourceColor;
|
||||
case 4: return MTL::BlendFactorDestinationColor;
|
||||
case 5: return MTL::BlendFactorOneMinusDestinationColor;
|
||||
case 6: return MTL::BlendFactorSourceAlpha;
|
||||
case 7: return MTL::BlendFactorOneMinusSourceAlpha;
|
||||
case 8: return MTL::BlendFactorDestinationAlpha;
|
||||
case 9: return MTL::BlendFactorOneMinusDestinationAlpha;
|
||||
case 10: return MTL::BlendFactorBlendColor;
|
||||
case 11: return MTL::BlendFactorOneMinusBlendColor;
|
||||
case 12: return MTL::BlendFactorBlendAlpha;
|
||||
case 13: return MTL::BlendFactorOneMinusBlendAlpha;
|
||||
case 14: return MTL::BlendFactorSourceAlphaSaturated;
|
||||
case 15: return MTL::BlendFactorOne; // Undocumented
|
||||
default: Helpers::panic("Unknown blend factor %u", factor);
|
||||
}
|
||||
|
||||
return MTL::BlendFactorOne;
|
||||
}
|
||||
|
||||
inline MTL::StencilOperation toMTLStencilOperation(u8 op) {
|
||||
switch (op) {
|
||||
case 0: return MTL::StencilOperationKeep;
|
||||
case 1: return MTL::StencilOperationZero;
|
||||
case 2: return MTL::StencilOperationReplace;
|
||||
case 3: return MTL::StencilOperationIncrementClamp;
|
||||
case 4: return MTL::StencilOperationDecrementClamp;
|
||||
case 5: return MTL::StencilOperationInvert;
|
||||
case 6: return MTL::StencilOperationIncrementWrap;
|
||||
case 7: return MTL::StencilOperationDecrementWrap;
|
||||
default: Helpers::panic("Unknown stencil operation %u", op);
|
||||
}
|
||||
|
||||
return MTL::StencilOperationKeep;
|
||||
}
|
||||
|
||||
inline MTL::PrimitiveType toMTLPrimitiveType(PrimType primType) {
|
||||
switch (primType) {
|
||||
case PrimType::TriangleList: return MTL::PrimitiveTypeTriangle;
|
||||
case PrimType::TriangleStrip: return MTL::PrimitiveTypeTriangleStrip;
|
||||
case PrimType::TriangleFan:
|
||||
Helpers::warn("Triangle fans are not supported on Metal, using triangles instead");
|
||||
return MTL::PrimitiveTypeTriangle;
|
||||
case PrimType::GeometryPrimitive:
|
||||
return MTL::PrimitiveTypeTriangle;
|
||||
}
|
||||
}
|
||||
|
||||
inline MTL::SamplerAddressMode toMTLSamplerAddressMode(u8 addrMode) {
|
||||
switch (addrMode) {
|
||||
case 0: return MTL::SamplerAddressModeClampToEdge;
|
||||
case 1: return MTL::SamplerAddressModeClampToBorderColor;
|
||||
case 2: return MTL::SamplerAddressModeRepeat;
|
||||
case 3: return MTL::SamplerAddressModeMirrorRepeat;
|
||||
case 4: return MTL::SamplerAddressModeClampToEdge;
|
||||
case 5: return MTL::SamplerAddressModeClampToBorderColor;
|
||||
case 6: return MTL::SamplerAddressModeRepeat;
|
||||
case 7: return MTL::SamplerAddressModeRepeat;
|
||||
default: Helpers::panic("Unknown sampler address mode %u", addrMode);
|
||||
}
|
||||
|
||||
return MTL::SamplerAddressModeClampToEdge;
|
||||
}
|
||||
} // namespace PICA
|
207
include/renderer_mtl/renderer_mtl.hpp
Normal file
|
@ -0,0 +1,207 @@
|
|||
#pragma once
|
||||
|
||||
#include <Metal/Metal.hpp>
|
||||
#include <QuartzCore/QuartzCore.hpp>
|
||||
|
||||
#include "mtl_blit_pipeline_cache.hpp"
|
||||
#include "mtl_command_encoder.hpp"
|
||||
#include "mtl_depth_stencil_cache.hpp"
|
||||
#include "mtl_draw_pipeline_cache.hpp"
|
||||
#include "mtl_lut_texture.hpp"
|
||||
#include "mtl_render_target.hpp"
|
||||
#include "mtl_texture.hpp"
|
||||
#include "mtl_vertex_buffer_cache.hpp"
|
||||
#include "renderer.hpp"
|
||||
|
||||
|
||||
// HACK: use the OpenGL cache
|
||||
#include "../renderer_gl/surface_cache.hpp"
|
||||
|
||||
class GPU;
|
||||
|
||||
struct Color4 {
|
||||
float r, g, b, a;
|
||||
};
|
||||
|
||||
class RendererMTL final : public Renderer {
|
||||
public:
|
||||
RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
~RendererMTL() override;
|
||||
|
||||
void reset() override;
|
||||
void display() override;
|
||||
void initGraphicsContext(SDL_Window* window) override;
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
|
||||
void screenshot(const std::string& name) override;
|
||||
void deinitGraphicsContext() override;
|
||||
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override {}
|
||||
#endif
|
||||
|
||||
private:
|
||||
CA::MetalLayer* metalLayer;
|
||||
|
||||
MTL::Device* device;
|
||||
MTL::CommandQueue* commandQueue;
|
||||
|
||||
Metal::CommandEncoder commandEncoder;
|
||||
|
||||
// Libraries
|
||||
MTL::Library* library;
|
||||
|
||||
// Caches
|
||||
SurfaceCache<Metal::ColorRenderTarget, 16, true> colorRenderTargetCache;
|
||||
SurfaceCache<Metal::DepthStencilRenderTarget, 16, true> depthStencilRenderTargetCache;
|
||||
SurfaceCache<Metal::Texture, 256, true> textureCache;
|
||||
Metal::BlitPipelineCache blitPipelineCache;
|
||||
Metal::DrawPipelineCache drawPipelineCache;
|
||||
Metal::DepthStencilCache depthStencilCache;
|
||||
Metal::VertexBufferCache vertexBufferCache;
|
||||
|
||||
// Resources
|
||||
MTL::SamplerState* nearestSampler;
|
||||
MTL::SamplerState* linearSampler;
|
||||
MTL::Texture* nullTexture;
|
||||
MTL::DepthStencilState* defaultDepthStencilState;
|
||||
|
||||
Metal::LutTexture* lutLightingTexture;
|
||||
Metal::LutTexture* lutFogTexture;
|
||||
|
||||
// Pipelines
|
||||
MTL::RenderPipelineState* displayPipeline;
|
||||
// MTL::RenderPipelineState* copyToLutTexturePipeline;
|
||||
|
||||
// Clears
|
||||
std::map<MTL::Texture*, Color4> colorClearOps;
|
||||
std::map<MTL::Texture*, float> depthClearOps;
|
||||
std::map<MTL::Texture*, u8> stencilClearOps;
|
||||
|
||||
// Active state
|
||||
MTL::CommandBuffer* commandBuffer = nullptr;
|
||||
MTL::RenderCommandEncoder* renderCommandEncoder = nullptr;
|
||||
MTL::Texture* lastColorTexture = nullptr;
|
||||
MTL::Texture* lastDepthTexture = nullptr;
|
||||
|
||||
// Debug
|
||||
std::string nextRenderPassName;
|
||||
|
||||
void createCommandBufferIfNeeded() {
|
||||
if (!commandBuffer) {
|
||||
commandBuffer = commandQueue->commandBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
void endRenderPass() {
|
||||
if (renderCommandEncoder) {
|
||||
renderCommandEncoder->endEncoding();
|
||||
renderCommandEncoder = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void beginRenderPassIfNeeded(
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor, bool doesClears, MTL::Texture* colorTexture, MTL::Texture* depthTexture = nullptr
|
||||
);
|
||||
|
||||
void commitCommandBuffer() {
|
||||
if (renderCommandEncoder) {
|
||||
renderCommandEncoder->endEncoding();
|
||||
renderCommandEncoder->release();
|
||||
renderCommandEncoder = nullptr;
|
||||
}
|
||||
if (commandBuffer) {
|
||||
commandBuffer->commit();
|
||||
// HACK
|
||||
commandBuffer->waitUntilCompleted();
|
||||
commandBuffer->release();
|
||||
commandBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
|
||||
inline void clearAttachment(
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, ClearDataT clearData, GetAttachmentT getAttachment,
|
||||
SetClearDataT setClearData
|
||||
) {
|
||||
bool beginRenderPass = (renderPassDescriptor == nullptr);
|
||||
if (!renderPassDescriptor) {
|
||||
renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init();
|
||||
}
|
||||
|
||||
AttachmentT* attachment = getAttachment(renderPassDescriptor);
|
||||
attachment->setTexture(texture);
|
||||
setClearData(attachment, clearData);
|
||||
attachment->setLoadAction(MTL::LoadActionClear);
|
||||
attachment->setStoreAction(MTL::StoreActionStore);
|
||||
|
||||
if (beginRenderPass) {
|
||||
if (std::is_same<AttachmentT, MTL::RenderPassColorAttachmentDescriptor>::value)
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, true, texture);
|
||||
else
|
||||
beginRenderPassIfNeeded(renderPassDescriptor, true, nullptr, texture);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename AttachmentT, typename ClearDataT, typename GetAttachmentT, typename SetClearDataT>
|
||||
inline bool clearAttachment(
|
||||
MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture, std::map<MTL::Texture*, ClearDataT>& clearOps,
|
||||
GetAttachmentT getAttachment, SetClearDataT setClearData
|
||||
) {
|
||||
auto it = clearOps.find(texture);
|
||||
if (it != clearOps.end()) {
|
||||
clearAttachment<AttachmentT>(renderPassDescriptor, texture, it->second, getAttachment, setClearData);
|
||||
clearOps.erase(it);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (renderPassDescriptor) {
|
||||
AttachmentT* attachment = getAttachment(renderPassDescriptor);
|
||||
attachment->setTexture(texture);
|
||||
attachment->setLoadAction(MTL::LoadActionLoad);
|
||||
attachment->setStoreAction(MTL::StoreActionStore);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool clearColor(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
|
||||
return clearAttachment<MTL::RenderPassColorAttachmentDescriptor, Color4>(
|
||||
renderPassDescriptor, texture, colorClearOps,
|
||||
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->colorAttachments()->object(0); },
|
||||
[](auto attachment, auto& color) { attachment->setClearColor(MTL::ClearColor(color.r, color.g, color.b, color.a)); }
|
||||
);
|
||||
}
|
||||
|
||||
bool clearDepth(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
|
||||
return clearAttachment<MTL::RenderPassDepthAttachmentDescriptor, float>(
|
||||
renderPassDescriptor, texture, depthClearOps,
|
||||
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->depthAttachment(); },
|
||||
[](auto attachment, auto& depth) { attachment->setClearDepth(depth); }
|
||||
);
|
||||
}
|
||||
|
||||
bool clearStencil(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* texture) {
|
||||
return clearAttachment<MTL::RenderPassStencilAttachmentDescriptor, u8>(
|
||||
renderPassDescriptor, texture, stencilClearOps,
|
||||
[](MTL::RenderPassDescriptor* renderPassDescriptor) { return renderPassDescriptor->stencilAttachment(); },
|
||||
[](auto attachment, auto& stencil) { attachment->setClearStencil(stencil); }
|
||||
);
|
||||
}
|
||||
|
||||
std::optional<Metal::ColorRenderTarget> getColorRenderTarget(
|
||||
u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true
|
||||
);
|
||||
Metal::DepthStencilRenderTarget& getDepthRenderTarget();
|
||||
Metal::Texture& getTexture(Metal::Texture& tex);
|
||||
void setupTextureEnvState(MTL::RenderCommandEncoder* encoder);
|
||||
void bindTexturesToSlots();
|
||||
void updateLightingLUT(MTL::RenderCommandEncoder* encoder);
|
||||
void updateFogLUT(MTL::RenderCommandEncoder* encoder);
|
||||
void textureCopyImpl(
|
||||
Metal::ColorRenderTarget& srcFramebuffer, Metal::ColorRenderTarget& destFramebuffer, const Math::Rect<u32>& srcRect,
|
||||
const Math::Rect<u32>& destRect
|
||||
);
|
||||
};
|
|
@ -11,7 +11,8 @@ struct Scheduler {
|
|||
VBlank = 0, // End of frame event
|
||||
UpdateTimers = 1, // Update kernel timer objects
|
||||
RunDSP = 2, // Make the emulated DSP run for one audio frame
|
||||
Panic = 3, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX)
|
||||
SignalY2R = 3, // Signal that a Y2R conversion has finished
|
||||
Panic = 4, // Dummy event that is always pending and should never be triggered (Timestamp = UINT64_MAX)
|
||||
TotalNumberOfEvents // How many event types do we have in total?
|
||||
};
|
||||
static constexpr usize totalNumberOfEvents = static_cast<usize>(EventType::TotalNumberOfEvents);
|
||||
|
|
38
include/sdl_sensors.hpp
Normal file
|
@ -0,0 +1,38 @@
|
|||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <glm/glm.hpp>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "services/hid.hpp"
|
||||
|
||||
// Convert SDL sensor readings to 3DS format
|
||||
// We use the same code for Android as well, since the values we get from Android are in the same format as SDL (m/s^2 for acceleration, rad/s for
|
||||
// rotation)
|
||||
namespace Sensors::SDL {
|
||||
// Convert the rotation data we get from SDL sensor events to rotation data we can feed right to HID
|
||||
// Returns [pitch, roll, yaw]
|
||||
static glm::vec3 convertRotation(glm::vec3 rotation) {
|
||||
// Annoyingly, Android doesn't support the <numbers> header yet so we define pi ourselves
|
||||
static constexpr double pi = 3.141592653589793;
|
||||
// Convert the rotation from rad/s to deg/s and scale by the gyroscope coefficient in HID
|
||||
constexpr float scale = 180.f / pi * HIDService::gyroscopeCoeff;
|
||||
// The axes are also inverted, so invert scale before the multiplication.
|
||||
return rotation * -scale;
|
||||
}
|
||||
|
||||
static glm::vec3 convertAcceleration(float* data) {
|
||||
// Set our cap to ~9 m/s^2. The 3DS sensors cap at -930 and +930, so values above this value will get clamped to 930
|
||||
// At rest (3DS laid flat on table), hardware reads around ~0 for x and z axis, and around ~480 for y axis due to gravity.
|
||||
// This code tries to mimic this approximately, with offsets based on measurements from my DualShock 4.
|
||||
static constexpr float accelMax = 9.f;
|
||||
// We define standard gravity(g) ourself instead of using the SDL one in order for the code to work on Android too.
|
||||
static constexpr float standardGravity = 9.80665f;
|
||||
|
||||
s16 x = std::clamp<s16>(s16(data[0] / accelMax * 930.f), -930, +930);
|
||||
s16 y = std::clamp<s16>(s16(data[1] / (standardGravity * accelMax) * 930.f - 350.f), -930, +930);
|
||||
s16 z = std::clamp<s16>(s16((data[2] - 2.1f) / accelMax * 930.f), -930, +930);
|
||||
|
||||
return glm::vec3(x, y, z);
|
||||
}
|
||||
} // namespace Sensors::SDL
|
|
@ -8,6 +8,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class ACService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::AC;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, acLogger)
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class ACTService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::ACT;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, actLogger)
|
||||
|
@ -15,7 +17,7 @@ class ACTService {
|
|||
void generateUUID(u32 messagePointer);
|
||||
void getAccountDataBlock(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
ACTService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class AMService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::AM;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, amLogger)
|
||||
|
@ -15,7 +17,7 @@ class AMService {
|
|||
void getPatchTitleInfo(u32 messagePointer);
|
||||
void listTitleInfo(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
AMService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
class Kernel;
|
||||
|
||||
enum class ConsoleModel : u32 {
|
||||
Old3DS, New3DS
|
||||
Old3DS,
|
||||
New3DS,
|
||||
};
|
||||
|
||||
// https://www.3dbrew.org/wiki/NS_and_APT_Services#Command
|
||||
|
@ -41,6 +42,8 @@ namespace APT::Transitions {
|
|||
}
|
||||
|
||||
class APTService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::APT;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -99,7 +102,7 @@ class APTService {
|
|||
|
||||
u32 screencapPostPermission;
|
||||
|
||||
public:
|
||||
public:
|
||||
APTService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel), appletManager(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class BOSSService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::BOSS;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, bossLogger)
|
||||
|
@ -17,7 +19,7 @@ class BOSSService {
|
|||
void getNewArrivalFlag(u32 messagePointer);
|
||||
void getNsDataIdList(u32 messagePointer, u32 commandWord);
|
||||
void getOptoutFlag(u32 messagePointer);
|
||||
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
|
||||
void getStorageEntryInfo(u32 messagePointer); // Unknown what this is, name taken from Citra
|
||||
void getTaskIdList(u32 messagePointer);
|
||||
void getTaskInfo(u32 messagePointer);
|
||||
void getTaskServiceStatus(u32 messagePointer);
|
||||
|
@ -35,7 +37,8 @@ class BOSSService {
|
|||
void unregisterTask(u32 messagePointer);
|
||||
|
||||
s8 optoutFlag;
|
||||
public:
|
||||
|
||||
public:
|
||||
BOSSService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
class Kernel;
|
||||
|
||||
class CAMService {
|
||||
using Handle = HorizonHandle;
|
||||
using Event = std::optional<Handle>;
|
||||
|
||||
struct Port {
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <optional>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "kernel_types.hpp"
|
||||
#include "logger.hpp"
|
||||
|
@ -9,6 +10,8 @@
|
|||
class Kernel;
|
||||
|
||||
class CECDService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::CECD;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -20,7 +23,7 @@ class CECDService {
|
|||
void getInfoEventHandle(u32 messagePointer);
|
||||
void openAndRead(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
CECDService(Memory& mem, Kernel& kernel) : mem(mem), kernel(kernel) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#pragma once
|
||||
#include <cstring>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "memory.hpp"
|
||||
|
@ -7,8 +8,10 @@
|
|||
#include "result/result.hpp"
|
||||
|
||||
class CFGService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Memory& mem;
|
||||
CountryCodes country = CountryCodes::US; // Default to USA
|
||||
CountryCodes country = CountryCodes::US; // Default to USA
|
||||
MAKE_LOG_FUNCTION(log, cfgLogger)
|
||||
|
||||
void writeStringU16(u32 pointer, const std::u16string& string);
|
||||
|
@ -27,12 +30,12 @@ class CFGService {
|
|||
|
||||
void getConfigInfo(u32 output, u32 blockID, u32 size, u32 permissionMask);
|
||||
|
||||
public:
|
||||
public:
|
||||
enum class Type {
|
||||
U, // cfg:u
|
||||
I, // cfg:i
|
||||
S, // cfg:s
|
||||
NOR, // cfg:nor
|
||||
U, // cfg:u
|
||||
I, // cfg:i
|
||||
S, // cfg:s
|
||||
NOR, // cfg:nor
|
||||
};
|
||||
|
||||
CFGService(Memory& mem) : mem(mem) {}
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
class Kernel;
|
||||
|
||||
class CSNDService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::CSND;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
@ -30,7 +32,5 @@ class CSNDService {
|
|||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
||||
void setSharedMemory(u8* ptr) {
|
||||
sharedMemory = ptr;
|
||||
}
|
||||
void setSharedMemory(u8* ptr) { sharedMemory = ptr; }
|
||||
};
|
|
@ -8,6 +8,8 @@
|
|||
// Please forgive me for how everything in this file is named
|
||||
// "dlp:SRVR" is not a nice name to work with
|
||||
class DlpSrvrService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::DLP_SRVR;
|
||||
Memory& mem;
|
||||
MAKE_LOG_FUNCTION(log, dlpSrvrLogger)
|
||||
|
@ -15,7 +17,7 @@ class DlpSrvrService {
|
|||
// Service commands
|
||||
void isChild(u32 messagePointer);
|
||||
|
||||
public:
|
||||
public:
|
||||
DlpSrvrService(Memory& mem) : mem(mem) {}
|
||||
void reset();
|
||||
void handleSyncRequest(u32 messagePointer);
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
class Kernel;
|
||||
|
||||
class DSPService {
|
||||
using Handle = HorizonHandle;
|
||||
|
||||
Handle handle = KernelHandles::DSP;
|
||||
Memory& mem;
|
||||
Kernel& kernel;
|
||||
|
|
84
include/services/fonts.hpp
Normal file
|
@ -0,0 +1,84 @@
|
|||
// Copyright 2016 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.h
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "swap.hpp"
|
||||
|
||||
namespace HLE::Fonts {
|
||||
struct CFNT {
|
||||
u8 magic[4];
|
||||
u16_le endianness;
|
||||
u16_le headerSize;
|
||||
u32_le version;
|
||||
u32_le fileSize;
|
||||
u32_le numBlocks;
|
||||
};
|
||||
|
||||
struct SectionHeader {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
};
|
||||
|
||||
struct FINF {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u8 fontType;
|
||||
u8 lineFeed;
|
||||
u16_le alterCharIndex;
|
||||
u8 default_width[3];
|
||||
u8 encoding;
|
||||
u32_le tglpOffset;
|
||||
u32_le cwdhOffset;
|
||||
u32_le cmapOffset;
|
||||
u8 height;
|
||||
u8 width;
|
||||
u8 ascent;
|
||||
u8 reserved;
|
||||
};
|
||||
|
||||
struct TGLP {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u8 cellWidth;
|
||||
u8 cellHeight;
|
||||
u8 baselinePosition;
|
||||
u8 maxCharacterWidth;
|
||||
u32_le sheetSize;
|
||||
u16_le numSheets;
|
||||
u16_le sheetImageFormat;
|
||||
u16_le numColumns;
|
||||
u16_le numRows;
|
||||
u16_le sheetWidth;
|
||||
u16_le sheetHeight;
|
||||
u32_le sheetDataOffset;
|
||||
};
|
||||
|
||||
struct CMAP {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u16_le codeBegin;
|
||||
u16_le codeEnd;
|
||||
u16_le mappingMethod;
|
||||
u16_le reserved;
|
||||
u32_le nextCmapOffset;
|
||||
};
|
||||
|
||||
struct CWDH {
|
||||
u8 magic[4];
|
||||
u32_le sectionSize;
|
||||
u16_le startIndex;
|
||||
u16_le endIndex;
|
||||
u32_le nextCwdhOffset;
|
||||
};
|
||||
|
||||
// Relocates the internal addresses of the BCFNT Shared Font to the new base. The current base will
|
||||
// be auto-detected based on the file headers.
|
||||
void relocateSharedFont(u8* sharedFont, u32 newAddress);
|
||||
} // namespace HLE::Fonts
|