From a9bb11e4b3b5c9617055f965dd38cd12820acffb Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Sat, 15 Jul 2023 19:04:53 -0700 Subject: [PATCH] Migrate OpenGL string-literals to embedded files Rather than declaring a big C++ literal for these strings, they are now proper `.vert` and `.frag` files that will become embedded into the application at build-time. It also allows for clang-format to format the glsl files the same as our C++ code. CMake will also automatically track and re-embed the file if it detects that the glsl source files have changed since the last build. Ex, making a change to `opengl_display.frag` and compiling will automatically recompile and link the resource-target that it is associated with. --- CMakeLists.txt | 14 +- src/core/renderer_gl/renderer_gl.cpp | 589 +------------------ src/host_shaders/opengl_display.frag | 6 + src/host_shaders/opengl_display.vert | 23 + src/host_shaders/opengl_fragment_shader.frag | 409 +++++++++++++ src/host_shaders/opengl_vertex_shader.vert | 97 +++ 6 files changed, 564 insertions(+), 574 deletions(-) create mode 100644 src/host_shaders/opengl_display.frag create mode 100644 src/host_shaders/opengl_display.vert create mode 100644 src/host_shaders/opengl_fragment_shader.frag create mode 100644 src/host_shaders/opengl_vertex_shader.vert diff --git a/CMakeLists.txt b/CMakeLists.txt index c023e008..fc45b591 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,8 @@ add_subdirectory(third_party/toml11) include_directories(${SDL2_INCLUDE_DIR}) include_directories(third_party/toml11) +add_subdirectory(third_party/cmrc) + set(BOOST_ROOT "${CMAKE_SOURCE_DIR}/third_party/boost") set(Boost_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/third_party/boost") set(Boost_NO_SYSTEM_PATHS ON) @@ -185,6 +187,16 @@ if(ENABLE_OPENGL) set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES}) source_group("Source Files\\Core\\OpenGL Renderer" FILES ${RENDERER_GL_SOURCE_FILES}) + + cmrc_add_resource_library( + resources_renderer_gl + NAMESPACE RendererGL + WHENCE "src/host_shaders/" + "src/host_shaders/opengl_display.frag" + "src/host_shaders/opengl_display.vert" + "src/host_shaders/opengl_vertex_shader.vert" + "src/host_shaders/opengl_fragment_shader.frag" + ) endif() source_group("Header Files\\Core" FILES ${HEADER_FILES}) @@ -206,7 +218,7 @@ target_link_libraries(Alber PRIVATE dynarmic SDL2-static cryptopp) if(ENABLE_OPENGL) target_compile_definitions(Alber PUBLIC "PANDA3DS_ENABLE_OPENGL=1") - target_link_libraries(Alber PRIVATE glad) + target_link_libraries(Alber PRIVATE glad resources_renderer_gl) endif() if(GPU_DEBUG_INFO) diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 2c41f83b..94639f51 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -2,583 +2,18 @@ #include +#include + #include "PICA/float_types.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" +CMRC_DECLARE(RendererGL); + using namespace Floats; using namespace Helpers; using namespace PICA; -const char* vertexShader = R"( - #version 410 core - - layout (location = 0) in vec4 a_coords; - layout (location = 1) in vec4 a_quaternion; - layout (location = 2) in vec4 a_vertexColour; - layout (location = 3) in vec2 a_texcoord0; - layout (location = 4) in vec2 a_texcoord1; - layout (location = 5) in float a_texcoord0_w; - layout (location = 6) in vec3 a_view; - layout (location = 7) in vec2 a_texcoord2; - - out vec3 v_normal; - out vec3 v_tangent; - out vec3 v_bitangent; - out vec4 v_colour; - out vec3 v_texcoord0; - out vec2 v_texcoord1; - out vec3 v_view; - out vec2 v_texcoord2; - flat out vec4 v_textureEnvColor[6]; - flat out vec4 v_textureEnvBufferColor; - - out float gl_ClipDistance[2]; - - // TEV uniforms - uniform uint u_textureEnvColor[6]; - uniform uint u_picaRegs[0x200 - 0x48]; - - // Helper so that the implementation of u_pica_regs can be changed later - uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr - 0x48]; - } - - vec4 abgr8888ToVec4(uint abgr) { - const float scale = 1.0 / 255.0; - - return scale * vec4( - float(abgr & 0xffu), - float((abgr >> 8) & 0xffu), - float((abgr >> 16) & 0xffu), - float(abgr >> 24) - ); - } - - vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){ - vec3 u = q.xyz; - float s = q.w; - return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v + 2.0 * s * cross(u, v); - } - - // Convert an arbitrary-width floating point literal to an f32 - float decodeFP(uint hex, uint E, uint M){ - uint width = M + E + 1u; - uint bias = 128u - (1u << (E - 1u)); - uint exponent = (hex >> M) & ((1u << E) - 1u); - uint mantissa = hex & ((1u << M) - 1u); - uint sign = (hex >> (E + M)) << 31u; - - if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { - if (exponent == (1u << E) - 1u) exponent = 255u; - else exponent += bias; - hex = sign | (mantissa << (23u - M)) | (exponent << 23u); - } else { - hex = sign; - } - - return uintBitsToFloat(hex); - } - - void main() { - gl_Position = a_coords; - v_colour = a_vertexColour; - - // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA - v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); - v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); - v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); - v_view = a_view; - - v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); - v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); - v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); - - for (int i = 0; i < 6; i++) { - v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); - } - - v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD)); - - // Parse clipping plane registers - // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 - // With n = (A, B, C) being the normal vector and D being the origin point distance - // Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1] - vec4 clipData = vec4( - decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16), - decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16), - decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16), - decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16) - ); - - // There's also another, always-on clipping plane based on vertex z - gl_ClipDistance[0] = -a_coords.z; - gl_ClipDistance[1] = dot(clipData, a_coords); - } -)"; - -const char* fragmentShader = R"( - #version 410 core - - in vec3 v_tangent; - in vec3 v_normal; - in vec3 v_bitangent; - in vec4 v_colour; - in vec3 v_texcoord0; - in vec2 v_texcoord1; - in vec3 v_view; - in vec2 v_texcoord2; - flat in vec4 v_textureEnvColor[6]; - flat in vec4 v_textureEnvBufferColor; - - out vec4 fragColour; - - // TEV uniforms - uniform uint u_textureEnvSource[6]; - uniform uint u_textureEnvOperand[6]; - uniform uint u_textureEnvCombiner[6]; - uniform uint u_textureEnvScale[6]; - - // Depth control uniforms - uniform float u_depthScale; - uniform float u_depthOffset; - uniform bool u_depthmapEnable; - - uniform sampler2D u_tex0; - uniform sampler2D u_tex1; - uniform sampler2D u_tex2; - uniform sampler1DArray u_tex_lighting_lut; - - uniform uint u_picaRegs[0x200 - 0x48]; - - // Helper so that the implementation of u_pica_regs can be changed later - uint readPicaReg(uint reg_addr){ - return u_picaRegs[reg_addr - 0x48]; - } - - vec4 tevSources[16]; - vec4 tevNextPreviousBuffer; - bool tevUnimplementedSourceFlag = false; - - // OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): - // https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml - - vec4 tevFetchSource(uint src_id) { - if (src_id >= 6u && src_id < 13u) { - tevUnimplementedSourceFlag = true; - } - - return tevSources[src_id]; - } - - vec4 tevGetColorAndAlphaSource(int tev_id, int src_id) { - vec4 result; - - vec4 colorSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4)) & 15u); - vec4 alphaSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); - - uint colorOperand = (u_textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; - uint alphaOperand = (u_textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; - - // TODO: figure out what the undocumented values do - switch (colorOperand) { - case 0u: result.rgb = colorSource.rgb; break; // Source color - case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color - case 2u: result.rgb = vec3(colorSource.a); break; // Source alpha - case 3u: result.rgb = vec3(1.0 - colorSource.a); break; // One minus source alpha - case 4u: result.rgb = vec3(colorSource.r); break; // Source red - case 5u: result.rgb = vec3(1.0 - colorSource.r); break; // One minus source red - case 8u: result.rgb = vec3(colorSource.g); break; // Source green - case 9u: result.rgb = vec3(1.0 - colorSource.g); break; // One minus source green - case 12u: result.rgb = vec3(colorSource.b); break; // Source blue - case 13u: result.rgb = vec3(1.0 - colorSource.b); break; // One minus source blue - default: break; - } - - // TODO: figure out what the undocumented values do - switch (alphaOperand) { - case 0u: result.a = alphaSource.a; break; // Source alpha - case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha - case 2u: result.a = alphaSource.r; break; // Source red - case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red - case 4u: result.a = alphaSource.g; break; // Source green - case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green - case 6u: result.a = alphaSource.b; break; // Source blue - case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue - default: break; - } - - return result; - } - - vec4 tevCalculateCombiner(int tev_id) { - vec4 source0 = tevGetColorAndAlphaSource(tev_id, 0); - vec4 source1 = tevGetColorAndAlphaSource(tev_id, 1); - vec4 source2 = tevGetColorAndAlphaSource(tev_id, 2); - - uint colorCombine = u_textureEnvCombiner[tev_id] & 15u; - uint alphaCombine = (u_textureEnvCombiner[tev_id] >> 16) & 15u; - - vec4 result = vec4(1.0); - - // TODO: figure out what the undocumented values do - switch (colorCombine) { - case 0u: result.rgb = source0.rgb; break; // Replace - case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate - case 2u: result.rgb = min(vec3(1.0), source0.rgb + source1.rgb); break; // Add - case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed - case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate - case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract - case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5 , source1.rgb - 0.5)); break; // Dot3 RGB - case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5 , source1.rgb - 0.5)); break; // Dot3 RGBA - case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add - case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply - default: break; - } - - if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. - // TODO: figure out what the undocumented values do - // TODO: test if the alpha combiner supports all the same modes as the color combiner. - switch (alphaCombine) { - case 0u: result.a = source0.a; break; // Replace - case 1u: result.a = source0.a * source1.a; break; // Modulate - case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add - case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed - case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate - case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract - case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add - case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply - default: break; - } - } - - result.rgb *= float(1 << (u_textureEnvScale[tev_id] & 3u)); - result.a *= float(1 << ((u_textureEnvScale[tev_id] >> 16) & 3u)); - - return result; - } - - #define D0_LUT 0u - #define D1_LUT 1u - #define SP_LUT 2u - #define FR_LUT 3u - #define RB_LUT 4u - #define RG_LUT 5u - #define RR_LUT 6u - - float lutLookup(uint lut, uint light, float value){ - if (lut >= FR_LUT && lut <= RR_LUT) - lut -= 1; - if (lut==SP_LUT) - lut = light + 8; - return texture(u_tex_lighting_lut, vec2(value, lut)).r; - } - - vec3 regToColor(uint reg) { - // Normalization scale to convert from [0...255] to [0.0...1.0] - const float scale = 1.0 / 255.0; - - return scale * vec3( - float(bitfieldExtract(reg, 20, 8)), - float(bitfieldExtract(reg, 10, 8)), - float(bitfieldExtract(reg, 00, 8)) - ); - } - - // Convert an arbitrary-width floating point literal to an f32 - float decodeFP(uint hex, uint E, uint M){ - uint width = M + E + 1u; - uint bias = 128u - (1u << (E - 1u)); - uint exponent = (hex >> M) & ((1u << E) - 1u); - uint mantissa = hex & ((1u << M) - 1u); - uint sign = (hex >> (E + M)) << 31u; - - if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { - if (exponent == (1u << E) - 1u) exponent = 255u; - else exponent += bias; - hex = sign | (mantissa << (23u - M)) | (exponent << 23u); - } else { - hex = sign; - } - - return uintBitsToFloat(hex); - } - - // Implements the following algorthm: https://mathb.in/26766 - void calcLighting(out vec4 primary_color, out vec4 secondary_color){ - // Quaternions describe a transformation from surface-local space to eye space. - // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), - // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). - vec3 normal = normalize(v_normal ); - vec3 tangent = normalize(v_tangent ); - vec3 bitangent = normalize(v_bitangent); - vec3 view = normalize(v_view); - - uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); - if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){ - primary_color = secondary_color = vec4(1.0); - return; - } - - uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0); - uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1; - uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9); - - primary_color = vec4(vec3(0.0),1.0); - secondary_color = vec4(vec3(0.0),1.0); - - primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); - - uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0); - uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1); - uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3); - uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4); - uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2); - float d[7]; - - bool error_unimpl = false; - - for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { - uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3); - - uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); - uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id); - uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id); - uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id); - uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id); - uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id); - uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id); - - vec3 light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10) - )); - - // Positional Light - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) - error_unimpl = true; - - vec3 half_vector = normalize(normalize(light_vector) + view); - - for (int c = 0; c < 7; c++) { - if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); - float scale = float(1u << scale_id); - if (scale_id >= 6u) - scale/=256.0; - - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); - if (input_id == 0u) d[c] = dot(normal,half_vector); - else if (input_id == 1u) d[c] = dot(view,half_vector); - else if (input_id == 2u) d[c] = dot(normal,view); - else if (input_id == 3u) d[c] = dot(light_vector,normal); - else if (input_id == 4u){ - uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id); - uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id); - vec3 spot_light_vector = normalize(vec3( - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11), - decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11) - )); - d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); - } else if (input_id == 5u) { - d[c] = 1.0; // TODO: cos (aka CP); - error_unimpl = true; - } else { - d[c] = 1.0; - } - - d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale; - if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) - d[c] = abs(d[c]); - } else { - d[c] = 1.0; - } - } - - uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); - if (lookup_config == 0) { - d[D1_LUT] = 0.0; - d[FR_LUT] = 0.0; - d[RG_LUT]= d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 1) { - d[D0_LUT] = 0.0; - d[D1_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 2) { - d[FR_LUT] = 0.0; - d[SP_LUT] = 0.0; - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } else if (lookup_config == 3) { - d[SP_LUT] = 0.0; - d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0; - } else if (lookup_config == 4) { - d[FR_LUT] = 0.0; - } else if (lookup_config == 5) { - d[D1_LUT] = 0.0; - } else if (lookup_config == 6) { - d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; - } - - float distance_factor = 1.0; // a - float indirect_factor = 1.0; // fi - float shadow_factor = 1.0; // o - - float NdotL = dot(normal, light_vector); //Li dot N - - // Two sided diffuse - if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL); - else NdotL = abs(NdotL); - - float light_factor = distance_factor*d[SP_LUT]*indirect_factor*shadow_factor; - - primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL); - secondary_color.rgb += light_factor * ( - regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + - regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]) - ); - } - uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); - uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); - - if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; - if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; - - if (error_unimpl) { - secondary_color = primary_color = vec4(1.0,0.,1.0,1.0); - } - } - - void main() { - // TODO: what do invalid sources and disabled textures read as? - // And what does the "previous combiner" source read initially? - tevSources[0] = v_colour; // Primary/vertex color - calcLighting(tevSources[1],tevSources[2]); - - uint textureConfig = readPicaReg(0x80); - vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; - - if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); - if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); - if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); - tevSources[13] = vec4(0.0); // Previous buffer - tevSources[15] = vec4(0.0); // Previous combiner - - tevNextPreviousBuffer = v_textureEnvBufferColor; - uint textureEnvUpdateBuffer = readPicaReg(0xE0); - - for (int i = 0; i < 6; i++) { - tevSources[14] = v_textureEnvColor[i]; // Constant color - tevSources[15] = tevCalculateCombiner(i); - tevSources[13] = tevNextPreviousBuffer; - - if (i < 4) { - if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { - tevNextPreviousBuffer.rgb = tevSources[15].rgb; - } - - if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { - tevNextPreviousBuffer.a = tevSources[15].a; - } - } - } - - fragColour = tevSources[15]; - - if (tevUnimplementedSourceFlag) { - // fragColour = vec4(1.0, 0.0, 1.0, 1.0); - } - // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; - - - // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] - // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] - float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; - float depth = z_over_w * u_depthScale + u_depthOffset; - - if (!u_depthmapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering) - depth /= gl_FragCoord.w; - - // Write final fragment depth - gl_FragDepth = depth; - - // Perform alpha test - uint alphaControl = readPicaReg(0x104); - if ((alphaControl & 1u) != 0u) { // Check if alpha test is on - uint func = (alphaControl >> 4u) & 7u; - float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; - float alpha = fragColour.a; - - switch (func) { - case 0: discard; // Never pass alpha test - case 1: break; // Always pass alpha test - case 2: // Pass if equal - if (alpha != reference) - discard; - break; - case 3: // Pass if not equal - if (alpha == reference) - discard; - break; - case 4: // Pass if less than - if (alpha >= reference) - discard; - break; - case 5: // Pass if less than or equal - if (alpha > reference) - discard; - break; - case 6: // Pass if greater than - if (alpha <= reference) - discard; - break; - case 7: // Pass if greater than or equal - if (alpha < reference) - discard; - break; - } - } - } -)"; - -const char* displayVertexShader = R"( - #version 410 core - out vec2 UV; - - void main() { - const vec4 positions[4] = vec4[]( - vec4(-1.0, 1.0, 1.0, 1.0), // Top-left - vec4(1.0, 1.0, 1.0, 1.0), // Top-right - vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left - vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right - ); - - // The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise - // So we adjust our texcoords accordingly - const vec2 texcoords[4] = vec2[]( - vec2(1.0, 1.0), // Top-right - vec2(1.0, 0.0), // Bottom-right - vec2(0.0, 1.0), // Top-left - vec2(0.0, 0.0) // Bottom-left - ); - - gl_Position = positions[gl_VertexID]; - UV = texcoords[gl_VertexID]; - } -)"; - -const char* displayFragmentShader = R"( - #version 410 core - in vec2 UV; - out vec4 FragColor; - - uniform sampler2D u_texture; - void main() { - FragColor = texture(u_texture, UV); - } -)"; - void RendererGL::reset() { depthBufferCache.reset(); colourBufferCache.reset(); @@ -611,8 +46,13 @@ void RendererGL::reset() { void RendererGL::initGraphicsContext() { gl.reset(); - OpenGL::Shader vert(vertexShader, OpenGL::Vertex); - OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); + auto gl_resources = cmrc::RendererGL::get_filesystem(); + + auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert"); + auto fragmentShaderSource = gl_resources.open("opengl_fragment_shader.frag"); + + OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex); + OpenGL::Shader frag({fragmentShaderSource.begin(), fragmentShaderSource.size()}, OpenGL::Fragment); triangleProgram.create({vert, frag}); gl.useProgram(triangleProgram); @@ -633,8 +73,11 @@ void RendererGL::initGraphicsContext() { glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3); - OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); - OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); + auto displayVertexShaderSource = gl_resources.open("opengl_display.vert"); + auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag"); + + OpenGL::Shader vertDisplay({displayVertexShaderSource.begin(), displayVertexShaderSource.size()}, OpenGL::Vertex); + OpenGL::Shader fragDisplay({displayFragmentShaderSource.begin(), displayFragmentShaderSource.size()}, OpenGL::Fragment); displayProgram.create({vertDisplay, fragDisplay}); gl.useProgram(displayProgram); diff --git a/src/host_shaders/opengl_display.frag b/src/host_shaders/opengl_display.frag new file mode 100644 index 00000000..1a8d2275 --- /dev/null +++ b/src/host_shaders/opengl_display.frag @@ -0,0 +1,6 @@ +#version 410 core +in vec2 UV; +out vec4 FragColor; + +uniform sampler2D u_texture; +void main() { FragColor = texture(u_texture, UV); } \ No newline at end of file diff --git a/src/host_shaders/opengl_display.vert b/src/host_shaders/opengl_display.vert new file mode 100644 index 00000000..990e2f80 --- /dev/null +++ b/src/host_shaders/opengl_display.vert @@ -0,0 +1,23 @@ +#version 410 core +out vec2 UV; + +void main() { + const vec4 positions[4] = vec4[]( + vec4(-1.0, 1.0, 1.0, 1.0), // Top-left + vec4(1.0, 1.0, 1.0, 1.0), // Top-right + vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left + vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right + ); + + // The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise + // So we adjust our texcoords accordingly + const vec2 texcoords[4] = vec2[]( + vec2(1.0, 1.0), // Top-right + vec2(1.0, 0.0), // Bottom-right + vec2(0.0, 1.0), // Top-left + vec2(0.0, 0.0) // Bottom-left + ); + + gl_Position = positions[gl_VertexID]; + UV = texcoords[gl_VertexID]; +} \ No newline at end of file diff --git a/src/host_shaders/opengl_fragment_shader.frag b/src/host_shaders/opengl_fragment_shader.frag new file mode 100644 index 00000000..f6461094 --- /dev/null +++ b/src/host_shaders/opengl_fragment_shader.frag @@ -0,0 +1,409 @@ +#version 410 core + +in vec3 v_tangent; +in vec3 v_normal; +in vec3 v_bitangent; +in vec4 v_colour; +in vec3 v_texcoord0; +in vec2 v_texcoord1; +in vec3 v_view; +in vec2 v_texcoord2; +flat in vec4 v_textureEnvColor[6]; +flat in vec4 v_textureEnvBufferColor; + +out vec4 fragColour; + +// TEV uniforms +uniform uint u_textureEnvSource[6]; +uniform uint u_textureEnvOperand[6]; +uniform uint u_textureEnvCombiner[6]; +uniform uint u_textureEnvScale[6]; + +// Depth control uniforms +uniform float u_depthScale; +uniform float u_depthOffset; +uniform bool u_depthmapEnable; + +uniform sampler2D u_tex0; +uniform sampler2D u_tex1; +uniform sampler2D u_tex2; +uniform sampler1DArray u_tex_lighting_lut; + +uniform uint u_picaRegs[0x200 - 0x48]; + +// Helper so that the implementation of u_pica_regs can be changed later +uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48]; } + +vec4 tevSources[16]; +vec4 tevNextPreviousBuffer; +bool tevUnimplementedSourceFlag = false; + +// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements): +// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml + +vec4 tevFetchSource(uint src_id) { + if (src_id >= 6u && src_id < 13u) { + tevUnimplementedSourceFlag = true; + } + + return tevSources[src_id]; +} + +vec4 tevGetColorAndAlphaSource(int tev_id, int src_id) { + vec4 result; + + vec4 colorSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4)) & 15u); + vec4 alphaSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u); + + uint colorOperand = (u_textureEnvOperand[tev_id] >> (src_id * 4)) & 15u; + uint alphaOperand = (u_textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u; + + // TODO: figure out what the undocumented values do + switch (colorOperand) { + case 0u: result.rgb = colorSource.rgb; break; // Source color + case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color + case 2u: result.rgb = vec3(colorSource.a); break; // Source alpha + case 3u: result.rgb = vec3(1.0 - colorSource.a); break; // One minus source alpha + case 4u: result.rgb = vec3(colorSource.r); break; // Source red + case 5u: result.rgb = vec3(1.0 - colorSource.r); break; // One minus source red + case 8u: result.rgb = vec3(colorSource.g); break; // Source green + case 9u: result.rgb = vec3(1.0 - colorSource.g); break; // One minus source green + case 12u: result.rgb = vec3(colorSource.b); break; // Source blue + case 13u: result.rgb = vec3(1.0 - colorSource.b); break; // One minus source blue + default: break; + } + + // TODO: figure out what the undocumented values do + switch (alphaOperand) { + case 0u: result.a = alphaSource.a; break; // Source alpha + case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha + case 2u: result.a = alphaSource.r; break; // Source red + case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red + case 4u: result.a = alphaSource.g; break; // Source green + case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green + case 6u: result.a = alphaSource.b; break; // Source blue + case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue + default: break; + } + + return result; +} + +vec4 tevCalculateCombiner(int tev_id) { + vec4 source0 = tevGetColorAndAlphaSource(tev_id, 0); + vec4 source1 = tevGetColorAndAlphaSource(tev_id, 1); + vec4 source2 = tevGetColorAndAlphaSource(tev_id, 2); + + uint colorCombine = u_textureEnvCombiner[tev_id] & 15u; + uint alphaCombine = (u_textureEnvCombiner[tev_id] >> 16) & 15u; + + vec4 result = vec4(1.0); + + // TODO: figure out what the undocumented values do + switch (colorCombine) { + case 0u: result.rgb = source0.rgb; break; // Replace + case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate + case 2u: result.rgb = min(vec3(1.0), source0.rgb + source1.rgb); break; // Add + case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate + case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract + case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB + case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA + case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add + case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply + default: break; + } + + if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode. + // TODO: figure out what the undocumented values do + // TODO: test if the alpha combiner supports all the same modes as the color combiner. + switch (alphaCombine) { + case 0u: result.a = source0.a; break; // Replace + case 1u: result.a = source0.a * source1.a; break; // Modulate + case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add + case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed + case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate + case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract + case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add + case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply + default: break; + } + } + + result.rgb *= float(1 << (u_textureEnvScale[tev_id] & 3u)); + result.a *= float(1 << ((u_textureEnvScale[tev_id] >> 16) & 3u)); + + return result; +} + +#define D0_LUT 0u +#define D1_LUT 1u +#define SP_LUT 2u +#define FR_LUT 3u +#define RB_LUT 4u +#define RG_LUT 5u +#define RR_LUT 6u + +float lutLookup(uint lut, uint light, float value) { + if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1; + if (lut == SP_LUT) lut = light + 8; + return texture(u_tex_lighting_lut, vec2(value, lut)).r; +} + +vec3 regToColor(uint reg) { + // Normalization scale to convert from [0...255] to [0.0...1.0] + const float scale = 1.0 / 255.0; + + return scale * vec3(float(bitfieldExtract(reg, 20, 8)), float(bitfieldExtract(reg, 10, 8)), float(bitfieldExtract(reg, 00, 8))); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return uintBitsToFloat(hex); +} + +// Implements the following algorthm: https://mathb.in/26766 +void calcLighting(out vec4 primary_color, out vec4 secondary_color) { + // Quaternions describe a transformation from surface-local space to eye space. + // In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1), + // the tangent vector is (1,0,0), and the bitangent vector is (0,1,0). + vec3 normal = normalize(v_normal); + vec3 tangent = normalize(v_tangent); + vec3 bitangent = normalize(v_bitangent); + vec3 view = normalize(v_view); + + uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F); + if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0) { + primary_color = secondary_color = vec4(1.0); + return; + } + + uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0); + uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) + 1; + uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9); + + primary_color = vec4(vec3(0.0), 1.0); + secondary_color = vec4(vec3(0.0), 1.0); + + primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); + + uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0); + uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1); + uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3); + uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4); + uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2); + float d[7]; + + bool error_unimpl = false; + + for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) { + uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3), 3); + + uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); + uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id); + uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id); + uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id); + uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id); + uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145 + 0x10 * light_id); + uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id); + + vec3 light_vector = normalize(vec3( + decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10) + )); + + // Positional Light + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) error_unimpl = true; + + vec3 half_vector = normalize(normalize(light_vector) + view); + + for (int c = 0; c < 7; c++) { + if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0) { + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); + float scale = float(1u << scale_id); + if (scale_id >= 6u) scale /= 256.0; + + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3); + if (input_id == 0u) + d[c] = dot(normal, half_vector); + else if (input_id == 1u) + d[c] = dot(view, half_vector); + else if (input_id == 2u) + d[c] = dot(normal, view); + else if (input_id == 3u) + d[c] = dot(light_vector, normal); + else if (input_id == 4u) { + uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id); + uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147 + 0x10 * light_id); + vec3 spot_light_vector = normalize(vec3( + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11), + decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11) + )); + d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP); + } else if (input_id == 5u) { + d[c] = 1.0; // TODO: cos (aka CP); + error_unimpl = true; + } else { + d[c] = 1.0; + } + + d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale; + if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]); + } else { + d[c] = 1.0; + } + } + + uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4); + if (lookup_config == 0) { + d[D1_LUT] = 0.0; + d[FR_LUT] = 0.0; + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if (lookup_config == 1) { + d[D0_LUT] = 0.0; + d[D1_LUT] = 0.0; + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if (lookup_config == 2) { + d[FR_LUT] = 0.0; + d[SP_LUT] = 0.0; + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } else if (lookup_config == 3) { + d[SP_LUT] = 0.0; + d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0; + } else if (lookup_config == 4) { + d[FR_LUT] = 0.0; + } else if (lookup_config == 5) { + d[D1_LUT] = 0.0; + } else if (lookup_config == 6) { + d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; + } + + float distance_factor = 1.0; // a + float indirect_factor = 1.0; // fi + float shadow_factor = 1.0; // o + + float NdotL = dot(normal, light_vector); // Li dot N + + // Two sided diffuse + if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) + NdotL = max(0.0, NdotL); + else + NdotL = abs(NdotL); + + float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor; + + primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL); + secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] + + regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])); + } + uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1); + uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1); + + if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT]; + if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT]; + + if (error_unimpl) { + secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0); + } +} + +void main() { + // TODO: what do invalid sources and disabled textures read as? + // And what does the "previous combiner" source read initially? + tevSources[0] = v_colour; // Primary/vertex color + calcLighting(tevSources[1], tevSources[2]); + + uint textureConfig = readPicaReg(0x80); + vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2; + + if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); + if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); + if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV); + tevSources[13] = vec4(0.0); // Previous buffer + tevSources[15] = vec4(0.0); // Previous combiner + + tevNextPreviousBuffer = v_textureEnvBufferColor; + uint textureEnvUpdateBuffer = readPicaReg(0xE0); + + for (int i = 0; i < 6; i++) { + tevSources[14] = v_textureEnvColor[i]; // Constant color + tevSources[15] = tevCalculateCombiner(i); + tevSources[13] = tevNextPreviousBuffer; + + if (i < 4) { + if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) { + tevNextPreviousBuffer.rgb = tevSources[15].rgb; + } + + if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { + tevNextPreviousBuffer.a = tevSources[15].a; + } + } + } + + fragColour = tevSources[15]; + + if (tevUnimplementedSourceFlag) { + // fragColour = vec4(1.0, 0.0, 1.0, 1.0); + } + // fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] + float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; + float depth = z_over_w * u_depthScale + u_depthOffset; + + if (!u_depthmapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering) + depth /= gl_FragCoord.w; + + // Write final fragment depth + gl_FragDepth = depth; + + // Perform alpha test + uint alphaControl = readPicaReg(0x104); + if ((alphaControl & 1u) != 0u) { // Check if alpha test is on + uint func = (alphaControl >> 4u) & 7u; + float reference = float((alphaControl >> 8u) & 0xffu) / 255.0; + float alpha = fragColour.a; + + switch (func) { + case 0: discard; // Never pass alpha test + case 1: break; // Always pass alpha test + case 2: // Pass if equal + if (alpha != reference) discard; + break; + case 3: // Pass if not equal + if (alpha == reference) discard; + break; + case 4: // Pass if less than + if (alpha >= reference) discard; + break; + case 5: // Pass if less than or equal + if (alpha > reference) discard; + break; + case 6: // Pass if greater than + if (alpha <= reference) discard; + break; + case 7: // Pass if greater than or equal + if (alpha < reference) discard; + break; + } + } +} \ No newline at end of file diff --git a/src/host_shaders/opengl_vertex_shader.vert b/src/host_shaders/opengl_vertex_shader.vert new file mode 100644 index 00000000..cbf992c4 --- /dev/null +++ b/src/host_shaders/opengl_vertex_shader.vert @@ -0,0 +1,97 @@ +#version 410 core + +layout(location = 0) in vec4 a_coords; +layout(location = 1) in vec4 a_quaternion; +layout(location = 2) in vec4 a_vertexColour; +layout(location = 3) in vec2 a_texcoord0; +layout(location = 4) in vec2 a_texcoord1; +layout(location = 5) in float a_texcoord0_w; +layout(location = 6) in vec3 a_view; +layout(location = 7) in vec2 a_texcoord2; + +out vec3 v_normal; +out vec3 v_tangent; +out vec3 v_bitangent; +out vec4 v_colour; +out vec3 v_texcoord0; +out vec2 v_texcoord1; +out vec3 v_view; +out vec2 v_texcoord2; +flat out vec4 v_textureEnvColor[6]; +flat out vec4 v_textureEnvBufferColor; + +out float gl_ClipDistance[2]; + +// TEV uniforms +uniform uint u_textureEnvColor[6]; +uniform uint u_picaRegs[0x200 - 0x48]; + +// Helper so that the implementation of u_pica_regs can be changed later +uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48]; } + +vec4 abgr8888ToVec4(uint abgr) { + const float scale = 1.0 / 255.0; + + return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); +} + +vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); +} + +// Convert an arbitrary-width floating point literal to an f32 +float decodeFP(uint hex, uint E, uint M) { + uint width = M + E + 1u; + uint bias = 128u - (1u << (E - 1u)); + uint exponent = (hex >> M) & ((1u << E) - 1u); + uint mantissa = hex & ((1u << M) - 1u); + uint sign = (hex >> (E + M)) << 31u; + + if ((hex & ((1u << (width - 1u)) - 1u)) != 0) { + if (exponent == (1u << E) - 1u) + exponent = 255u; + else + exponent += bias; + hex = sign | (mantissa << (23u - M)) | (exponent << 23u); + } else { + hex = sign; + } + + return uintBitsToFloat(hex); +} + +void main() { + gl_Position = a_coords; + v_colour = a_vertexColour; + + // Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + + for (int i = 0; i < 6; i++) { + v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); + } + + v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD)); + + // Parse clipping plane registers + // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 + // With n = (A, B, C) being the normal vector and D being the origin point distance + // Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1] + vec4 clipData = vec4( + decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16), decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16), + decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16), decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16) + ); + + // There's also another, always-on clipping plane based on vertex z + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipData, a_coords); +} \ No newline at end of file