diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a74728d..c52ccd51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,7 +198,7 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp - src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp + src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp ) set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp) @@ -245,10 +245,11 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp - include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp + include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp - include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp + include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp + include/PICA/pica_frag_uniforms.hpp ) cmrc_add_resource_library( diff --git a/include/PICA/pica_frag_config.hpp b/include/PICA/pica_frag_config.hpp new file mode 100644 index 00000000..59f13757 --- /dev/null +++ b/include/PICA/pica_frag_config.hpp @@ -0,0 +1,52 @@ +#pragma once +#include +#include +#include +#include + +#include "PICA/pica_hash.hpp" +#include "PICA/regs.hpp" +#include "bitfield.hpp" +#include "helpers.hpp" + +namespace PICA { + struct OutputConfig { + union { + u32 raw; + // Merge the enable + compare function into 1 field to avoid duplicate shaders + // enable == off means a CompareFunction of Always + BitField<0, 3, CompareFunction> alphaTestFunction; + BitField<4, 1, u32> depthMapEnable; + }; + }; + + struct TextureConfig { + u32 texUnitConfig; + u32 texEnvUpdateBuffer; + + // There's 6 TEV stages, and each one is configured via 5 word-sized registers + std::array tevConfigs; + }; + + // Config used for identifying unique fragment pipeline configurations + struct FragmentConfig { + OutputConfig outConfig; + TextureConfig texConfig; + + bool operator==(const FragmentConfig& config) const { + // Hash function and equality operator required by std::unordered_map + return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0; + } + }; + + static_assert( + std::has_unique_object_representations() && std::has_unique_object_representations() && + std::has_unique_object_representations() + ); +} // namespace PICA + +// Override std::hash for our fragment config class +template <> +struct std::hash { + std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); } +}; \ No newline at end of file diff --git a/include/PICA/pica_frag_uniforms.hpp b/include/PICA/pica_frag_uniforms.hpp new file mode 100644 index 00000000..332acd4e --- /dev/null +++ b/include/PICA/pica_frag_uniforms.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include + +#include "helpers.hpp" + +namespace PICA { + struct FragmentUniforms { + using vec3 = std::array; + using vec4 = std::array; + static constexpr usize tevStageCount = 6; + + s32 alphaReference; + float depthScale; + float depthOffset; + + alignas(16) vec4 constantColors[tevStageCount]; + alignas(16) vec4 tevBufferColor; + alignas(16) vec4 clipCoords; + }; +} // namespace PICA \ No newline at end of file diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 4342ebe5..74f8c7d5 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -345,4 +345,131 @@ namespace PICA { GeometryPrimitive = 3, }; + enum class CompareFunction : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + Less = 4, + LessOrEqual = 5, + Greater = 6, + GreaterOrEqual = 7, + }; + + struct TexEnvConfig { + enum class Source : u8 { + PrimaryColor = 0x0, + PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, + Texture0 = 0x3, + Texture1 = 0x4, + Texture2 = 0x5, + Texture3 = 0x6, + // TODO: Inbetween values are unknown + PreviousBuffer = 0xD, + Constant = 0xE, + Previous = 0xF, + }; + + enum class ColorOperand : u8 { + SourceColor = 0x0, + OneMinusSourceColor = 0x1, + SourceAlpha = 0x2, + OneMinusSourceAlpha = 0x3, + SourceRed = 0x4, + OneMinusSourceRed = 0x5, + // TODO: Inbetween values are unknown + SourceGreen = 0x8, + OneMinusSourceGreen = 0x9, + // Inbetween values are unknown + SourceBlue = 0xC, + OneMinusSourceBlue = 0xD, + }; + + enum class AlphaOperand : u8 { + SourceAlpha = 0x0, + OneMinusSourceAlpha = 0x1, + SourceRed = 0x2, + OneMinusSourceRed = 0x3, + SourceGreen = 0x4, + OneMinusSourceGreen = 0x5, + SourceBlue = 0x6, + OneMinusSourceBlue = 0x7, + }; + + enum class Operation : u8 { + Replace = 0, + Modulate = 1, + Add = 2, + AddSigned = 3, + Lerp = 4, + Subtract = 5, + Dot3RGB = 6, + Dot3RGBA = 7, + MultiplyAdd = 8, + AddMultiply = 9, + }; + + // RGB sources + Source colorSource1, colorSource2, colorSource3; + // Alpha sources + Source alphaSource1, alphaSource2, alphaSource3; + + // RGB operands + ColorOperand colorOperand1, colorOperand2, colorOperand3; + // Alpha operands + AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3; + + // Texture environment operations for this stage + Operation colorOp, alphaOp; + + u32 constColor; + + private: + // These are the only private members since their value doesn't actually reflect the scale + // So we make them public so we'll always use the appropriate member functions instead + u8 colorScale; + u8 alphaScale; + + public: + // Create texture environment object from TEV registers + TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) { + colorSource1 = Helpers::getBits<0, 4, Source>(source); + colorSource2 = Helpers::getBits<4, 4, Source>(source); + colorSource3 = Helpers::getBits<8, 4, Source>(source); + + alphaSource1 = Helpers::getBits<16, 4, Source>(source); + alphaSource2 = Helpers::getBits<20, 4, Source>(source); + alphaSource3 = Helpers::getBits<24, 4, Source>(source); + + colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand); + colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand); + colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand); + + alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand); + alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand); + alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand); + + colorOp = Helpers::getBits<0, 4, Operation>(combiner); + alphaOp = Helpers::getBits<16, 4, Operation>(combiner); + + colorScale = Helpers::getBits<0, 2>(scale); + alphaScale = Helpers::getBits<16, 2>(scale); + } + + u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; } + u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; } + + bool isPassthroughStage() { + // clang-format off + // Thank you to the Citra dev that wrote this out + return ( + colorOp == Operation::Replace && alphaOp == Operation::Replace && + colorSource1 == Source::Previous && alphaSource1 == Source::Previous && + colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha && + getColorScale() == 1 && getAlphaScale() == 1 + ); + // clang-format on + } + }; } // namespace PICA diff --git a/include/PICA/shader_gen.hpp b/include/PICA/shader_gen.hpp new file mode 100644 index 00000000..e8e8ca20 --- /dev/null +++ b/include/PICA/shader_gen.hpp @@ -0,0 +1,41 @@ +#pragma once +#include + +#include "PICA/gpu.hpp" +#include "PICA/regs.hpp" +#include "helpers.hpp" + +namespace PICA::ShaderGen { + // Graphics API this shader is targetting + enum class API { GL, GLES, Vulkan }; + + // Shading language to use (Only GLSL for the time being) + enum class Language { GLSL }; + + class FragmentGenerator { + using PICARegs = std::array; + API api; + Language language; + + void compileTEV(std::string& shader, int stage, const PICARegs& regs); + void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index); + void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index); + void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index); + void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op); + + void applyAlphaTest(std::string& shader, const PICARegs& regs); + + u32 textureConfig = 0; + + public: + FragmentGenerator(API api, Language language) : api(api), language(language) {} + std::string generate(const PICARegs& regs); + std::string getVertexShader(const PICARegs& regs); + + void setTarget(API api, Language language) { + this->api = api; + this->language = language; + } + }; +}; // namespace PICA::ShaderGen \ No newline at end of file diff --git a/include/config.hpp b/include/config.hpp index 6dbae9e3..8aa695aa 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -13,8 +13,11 @@ struct EmulatorConfig { static constexpr bool shaderJitDefault = false; #endif + static constexpr bool ubershaderDefault = true; + bool shaderJitEnabled = shaderJitDefault; bool discordRpcEnabled = false; + bool useUbershaders = ubershaderDefault; bool accurateShaderMul = false; RendererType rendererType = RendererType::OpenGL; Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null; diff --git a/include/renderer.hpp b/include/renderer.hpp index 17812bcf..e64d49e3 100644 --- a/include/renderer.hpp +++ b/include/renderer.hpp @@ -74,6 +74,8 @@ class Renderer { virtual std::string getUbershader() { return ""; } virtual void setUbershader(const std::string& shader) {} + virtual void setUbershaderSetting(bool value) {} + // Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window #ifdef PANDA3DS_FRONTEND_QT virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); } diff --git a/include/renderer_gl/gl_state.hpp b/include/renderer_gl/gl_state.hpp index 69960f1e..e5591ea0 100644 --- a/include/renderer_gl/gl_state.hpp +++ b/include/renderer_gl/gl_state.hpp @@ -40,9 +40,13 @@ struct GLStateManager { GLuint boundVAO; GLuint boundVBO; GLuint currentProgram; + GLuint boundUBO; GLenum depthFunc; GLenum logicOp; + GLenum blendEquationRGB, blendEquationAlpha; + GLenum blendFuncSourceRGB, blendFuncSourceAlpha; + GLenum blendFuncDestRGB, blendFuncDestAlpha; void reset(); void resetBlend(); @@ -51,7 +55,7 @@ struct GLStateManager { void resetColourMask(); void resetDepth(); void resetVAO(); - void resetVBO(); + void resetBuffers(); void resetProgram(); void resetScissor(); void resetStencil(); @@ -183,6 +187,13 @@ struct GLStateManager { } } + void bindUBO(GLuint handle) { + if (boundUBO != handle) { + boundUBO = handle; + glBindBuffer(GL_UNIFORM_BUFFER, boundUBO); + } + } + void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); } void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); } void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); } @@ -224,6 +235,41 @@ struct GLStateManager { } void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast(func)); } + + // Counterpart to glBlendEquationSeparate + void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) { + if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) { + blendEquationRGB = modeRGB; + blendEquationAlpha = modeAlpha; + + glBlendEquationSeparate(modeRGB, modeAlpha); + } + } + + // Counterpart to glBlendFuncSeparate + void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) { + if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha || + blendFuncDestAlpha != destAlpha) { + + blendFuncSourceRGB = sourceRGB; + blendFuncDestRGB = destRGB; + blendFuncSourceAlpha = sourceAlpha; + blendFuncDestAlpha = destAlpha; + + glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha); + } + } + + // Counterpart to regular glBlendEquation + void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); } + + void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) { + setBlendEquation(static_cast(modeRGB), static_cast(modeAlpha)); + } + + void setBlendEquation(OpenGL::BlendEquation mode) { + setBlendEquation(static_cast(mode)); + } }; static_assert(std::is_trivially_constructible(), "OpenGL State Manager class is not trivially constructible!"); diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index c947583e..6414a7cf 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -1,11 +1,17 @@ #pragma once #include +#include +#include #include +#include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_config.hpp" +#include "PICA/pica_hash.hpp" #include "PICA/pica_vertex.hpp" #include "PICA/regs.hpp" +#include "PICA/shader_gen.hpp" #include "gl_state.hpp" #include "helpers.hpp" #include "logger.hpp" @@ -24,21 +30,25 @@ class RendererGL final : public Renderer { OpenGL::VertexArray vao; OpenGL::VertexBuffer vbo; + bool usingUbershader = true; - // TEV configuration uniform locations - GLint textureEnvSourceLoc = -1; - GLint textureEnvOperandLoc = -1; - GLint textureEnvCombinerLoc = -1; - GLint textureEnvColorLoc = -1; - GLint textureEnvScaleLoc = -1; + // Data + struct { + // TEV configuration uniform locations + GLint textureEnvSourceLoc = -1; + GLint textureEnvOperandLoc = -1; + GLint textureEnvCombinerLoc = -1; + GLint textureEnvColorLoc = -1; + GLint textureEnvScaleLoc = -1; - // Uniform of PICA registers - GLint picaRegLoc = -1; + // Uniform of PICA registers + GLint picaRegLoc = -1; - // Depth configuration uniform locations - GLint depthOffsetLoc = -1; - GLint depthScaleLoc = -1; - GLint depthmapEnableLoc = -1; + // Depth configuration uniform locations + GLint depthOffsetLoc = -1; + GLint depthScaleLoc = -1; + GLint depthmapEnableLoc = -1; + } ubershaderData; float oldDepthScale = -1.0; float oldDepthOffset = 0.0; @@ -57,21 +67,31 @@ class RendererGL final : public Renderer { OpenGL::Framebuffer screenFramebuffer; OpenGL::Texture blankTexture; + // Cached recompiled fragment shader + struct CachedProgram { + OpenGL::Program program; + uint uboBinding; + }; + std::unordered_map shaderCache; + OpenGL::Framebuffer getColourFBO(); OpenGL::Texture getTexture(Texture& tex); + OpenGL::Program& getSpecializedShader(); + + PICA::ShaderGen::FragmentGenerator fragShaderGen; MAKE_LOG_FUNCTION(log, rendererLogger) void setupBlending(); void setupStencilTest(bool stencilEnable); void bindDepthBuffer(); - void setupTextureEnvState(); + void setupUbershaderTexEnv(); void bindTexturesToSlots(); void updateLightingLUT(); void initGraphicsContextInternal(); public: RendererGL(GPU& gpu, const std::array& internalRegs, const std::array& externalRegs) - : Renderer(gpu, internalRegs, externalRegs) {} + : Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {} ~RendererGL() override; void reset() override; @@ -87,6 +107,8 @@ class RendererGL final : public Renderer { virtual std::string getUbershader() override; virtual void setUbershader(const std::string& shader) override; + virtual void setUbershaderSetting(bool value) override { usingUbershader = value; } + std::optional getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true); // Note: The caller is responsible for deleting the currently bound FBO before calling this @@ -100,4 +122,4 @@ class RendererGL final : public Renderer { // Take a screenshot of the screen and store it in a file void screenshot(const std::string& name) override; -}; +}; \ No newline at end of file diff --git a/src/config.cpp b/src/config.cpp index 5af4d654..cc34d148 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -62,6 +62,7 @@ void EmulatorConfig::load() { shaderJitEnabled = toml::find_or(gpu, "EnableShaderJIT", shaderJitDefault); vsyncEnabled = toml::find_or(gpu, "EnableVSync", true); + useUbershaders = toml::find_or(gpu, "UseUbershaders", ubershaderDefault); accurateShaderMul = toml::find_or(gpu, "AccurateShaderMultiplication", false); } } @@ -123,10 +124,13 @@ void EmulatorConfig::save() { data["General"]["EnableDiscordRPC"] = discordRpcEnabled; data["General"]["UsePortableBuild"] = usePortableBuild; data["General"]["DefaultRomPath"] = defaultRomPath.string(); + data["GPU"]["EnableShaderJIT"] = shaderJitEnabled; data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType)); data["GPU"]["EnableVSync"] = vsyncEnabled; data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul; + data["GPU"]["UseUbershaders"] = useUbershaders; + data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType)); data["Audio"]["EnableAudio"] = audioEnabled; diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index ed0e5420..a54fe6eb 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -110,6 +110,7 @@ void GPU::reset() { externalRegs[Framebuffer1Config] = static_cast(PICA::ColorFmt::RGB8); externalRegs[Framebuffer1Select] = 0; + renderer->setUbershaderSetting(config.useUbershaders); renderer->reset(); } diff --git a/src/core/PICA/shader_gen_glsl.cpp b/src/core/PICA/shader_gen_glsl.cpp new file mode 100644 index 00000000..e135ac8e --- /dev/null +++ b/src/core/PICA/shader_gen_glsl.cpp @@ -0,0 +1,434 @@ +#include "PICA/shader_gen.hpp" +using namespace PICA; +using namespace PICA::ShaderGen; + +static constexpr const char* uniformDefinition = R"( + layout(std140) uniform FragmentUniforms { + int alphaReference; + float depthScale; + float depthOffset; + + vec4 constantColors[6]; + vec4 tevBufferColor; + vec4 clipCoords; + }; +)"; + +std::string FragmentGenerator::getVertexShader(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + + ret += uniformDefinition; + + ret += R"( + layout(location = 0) in vec4 a_coords; + layout(location = 1) in vec4 a_quaternion; + layout(location = 2) in vec4 a_vertexColour; + layout(location = 3) in vec2 a_texcoord0; + layout(location = 4) in vec2 a_texcoord1; + layout(location = 5) in float a_texcoord0_w; + layout(location = 6) in vec3 a_view; + layout(location = 7) in vec2 a_texcoord2; + + out vec3 v_normal; + out vec3 v_tangent; + out vec3 v_bitangent; + out vec4 v_colour; + out vec3 v_texcoord0; + out vec2 v_texcoord1; + out vec3 v_view; + out vec2 v_texcoord2; + + #ifndef USING_GLES + out float gl_ClipDistance[2]; + #endif + + vec4 abgr8888ToVec4(uint abgr) { + const float scale = 1.0 / 255.0; + return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24)); + } + + vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) { + vec3 u = q.xyz; + float s = q.w; + return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v); + } + + void main() { + gl_Position = a_coords; + vec4 colourAbs = abs(a_vertexColour); + v_colour = min(colourAbs, vec4(1.f)); + + v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w); + v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y); + v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y); + v_view = a_view; + + v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion)); + v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion)); + v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion)); + + #ifndef USING_GLES + gl_ClipDistance[0] = -a_coords.z; + gl_ClipDistance[1] = dot(clipCoords, a_coords); + #endif + } +)"; + + return ret; +} + +std::string FragmentGenerator::generate(const PICARegs& regs) { + std::string ret = ""; + + switch (api) { + case API::GL: ret += "#version 410 core"; break; + case API::GLES: ret += "#version 300 es"; break; + default: break; + } + + bool unimplementedFlag = false; + if (api == API::GLES) { + ret += R"( + #define USING_GLES 1 + + precision mediump int; + precision mediump float; + )"; + } + + // Input and output attributes + ret += R"( + in vec3 v_tangent; + in vec3 v_normal; + in vec3 v_bitangent; + in vec4 v_colour; + in vec3 v_texcoord0; + in vec2 v_texcoord1; + in vec3 v_view; + in vec2 v_texcoord2; + + out vec4 fragColor; + uniform sampler2D u_tex0; + uniform sampler2D u_tex1; + uniform sampler2D u_tex2; + // GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later +#ifndef USING_GLES + uniform sampler1DArray u_tex_lighting_lut; +#endif + )"; + + ret += uniformDefinition; + + // Emit main function for fragment shader + // When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour + ret += R"( + void main() { + vec4 combinerOutput = v_colour; + vec4 previousBuffer = vec4(0.0); + vec4 tevNextPreviousBuffer = tevBufferColor; + )"; + + ret += R"( + vec3 colorOp1 = vec3(0.0); + vec3 colorOp2 = vec3(0.0); + vec3 colorOp3 = vec3(0.0); + + float alphaOp1 = 0.0; + float alphaOp2 = 0.0; + float alphaOp3 = 0.0; + )"; + + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] + // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] + ret += R"( + float z_over_w = gl_FragCoord.z * 2.0f - 1.0f; + float depth = z_over_w * depthScale + depthOffset; + )"; + + if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) { + ret += "depth /= gl_FragCoord.w;\n"; + } + + ret += "gl_FragDepth = depth;\n"; + + textureConfig = regs[InternalRegs::TexUnitCfg]; + for (int i = 0; i < 6; i++) { + compileTEV(ret, i, regs); + } + + applyAlphaTest(ret, regs); + + ret += "fragColor = combinerOutput;\n"; + ret += "}"; // End of main function + ret += "\n\n\n\n\n\n\n"; + + return ret; +} + +void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) { + // Base address for each TEV stage's configuration + static constexpr std::array ioBases = { + InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source, + InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source, + }; + + const u32 ioBase = ioBases[stage]; + TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]); + + if (!tev.isPassthroughStage()) { + // Get color operands + shader += "colorOp1 = "; + getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage); + + shader += ";\ncolorOp2 = "; + getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage); + + shader += ";\ncolorOp3 = "; + getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage); + + shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp("; + getColorOperation(shader, tev.colorOp); + shader += ", vec3(0.0), vec3(1.0));\n"; + + if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) { + // Dot3 RGBA also writes to the alpha component so we don't need to do anything more + shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n"; + } else { + // Get alpha operands + shader += "alphaOp1 = "; + getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage); + + shader += ";\nalphaOp2 = "; + getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage); + + shader += ";\nalphaOp3 = "; + getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage); + + shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp("; + getAlphaOperation(shader, tev.alphaOp); + // Clamp the alpha value to [0.0, 1.0] + shader += ", 0.0, 1.0);\n"; + } + + shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) + + ".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) + + ".0, 0.0, 1.0));\n"; + } + + shader += "previousBuffer = tevNextPreviousBuffer;\n\n"; + + // Update the "next previous buffer" if necessary + const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + if (stage < 4) { + // Check whether to update rgb + if ((textureEnvUpdateBuffer & (0x100 << stage))) { + shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n"; + } + + // And whether to update alpha + if ((textureEnvUpdateBuffer & (0x1000u << stage))) { + shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n"; + } + } +} + +void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) { + using OperandType = TexEnvConfig::ColorOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || + color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) { + shader += "vec3(1.0, 1.0, 1.0) - "; + } + + switch (color) { + case OperandType::SourceColor: + case OperandType::OneMinusSourceColor: + getSource(shader, source, index); + shader += ".rgb"; + break; + + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".rrr"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".ggg"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".bbb"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".aaa"; + break; + + default: + shader += "vec3(1.0, 1.0, 1.0)"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) { + using OperandType = TexEnvConfig::AlphaOperand; + + // For inverting operands, add the 1.0 - x subtraction + if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue || + color == OperandType::OneMinusSourceAlpha) { + shader += "1.0 - "; + } + + switch (color) { + case OperandType::SourceRed: + case OperandType::OneMinusSourceRed: + getSource(shader, source, index); + shader += ".r"; + break; + + case OperandType::SourceGreen: + case OperandType::OneMinusSourceGreen: + getSource(shader, source, index); + shader += ".g"; + break; + + case OperandType::SourceBlue: + case OperandType::OneMinusSourceBlue: + getSource(shader, source, index); + shader += ".b"; + break; + + case OperandType::SourceAlpha: + case OperandType::OneMinusSourceAlpha: + getSource(shader, source, index); + shader += ".a"; + break; + + default: + shader += "1.0"; + Helpers::warn("FragmentGenerator: Invalid TEV color operand"); + break; + } +} + +void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) { + switch (source) { + case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break; + case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break; + case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break; + case TexEnvConfig::Source::Texture2: { + // If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2 + if (Helpers::getBit<13>(textureConfig)) { + shader += "texture(u_tex2, v_texcoord1)"; + } else { + shader += "texture(u_tex2, v_texcoord2)"; + } + break; + } + + case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break; + case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break; + case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break; + + // Lighting + case TexEnvConfig::Source::PrimaryFragmentColor: + case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break; + + default: + Helpers::warn("Unimplemented TEV source: %d", static_cast(source)); + shader += "vec4(1.0, 1.0, 1.0, 1.0)"; + break; + } +} + +void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break; + case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break; + case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break; + case TexEnvConfig::Operation::Dot3RGB: + case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented color op"); + shader += "vec3(1.0)"; + break; + } +} + +void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) { + switch (op) { + case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break; + case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break; + case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break; + case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break; + case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break; + case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break; + + case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break; + case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break; + default: + Helpers::warn("FragmentGenerator: Unimplemented alpha op"); + shader += "1.0"; + break; + } +} + +void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) { + const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig]; + const auto function = static_cast(Helpers::getBits<4, 3>(alphaConfig)); + + // Alpha test disabled + if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) { + return; + } + + shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n"; + shader += "if ("; + switch (function) { + case CompareFunction::Never: shader += "true"; break; + case CompareFunction::Always: shader += "false"; break; + case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break; + case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break; + case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break; + case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break; + case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break; + case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break; + + default: + Helpers::warn("Unimplemented alpha test function"); + shader += "false"; + break; + } + + shader += ") { discard; }\n"; +} diff --git a/src/core/renderer_gl/gl_state.cpp b/src/core/renderer_gl/gl_state.cpp index d2eec0d5..3d1c0681 100644 --- a/src/core/renderer_gl/gl_state.cpp +++ b/src/core/renderer_gl/gl_state.cpp @@ -5,9 +5,20 @@ void GLStateManager::resetBlend() { logicOpEnabled = false; logicOp = GL_COPY; + blendEquationRGB = GL_FUNC_ADD; + blendEquationAlpha = GL_FUNC_ADD; + + blendFuncSourceRGB = GL_SRC_COLOR; + blendFuncDestRGB = GL_DST_COLOR; + blendFuncSourceAlpha = GL_SRC_ALPHA; + blendFuncDestAlpha = GL_DST_ALPHA; + OpenGL::disableBlend(); OpenGL::disableLogicOp(); OpenGL::setLogicOp(GL_COPY); + + glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha); + glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha); } void GLStateManager::resetClearing() { @@ -61,9 +72,12 @@ void GLStateManager::resetVAO() { glBindVertexArray(0); } -void GLStateManager::resetVBO() { +void GLStateManager::resetBuffers() { boundVBO = 0; + boundUBO = 0; + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_UNIFORM_BUFFER, 0); } void GLStateManager::resetProgram() { @@ -79,7 +93,7 @@ void GLStateManager::reset() { resetDepth(); resetVAO(); - resetVBO(); + resetBuffers(); resetProgram(); resetScissor(); resetStencil(); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 2d29e682..b9a2c7ae 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -5,6 +5,7 @@ #include #include "PICA/float_types.hpp" +#include "PICA/pica_frag_uniforms.hpp" #include "PICA/gpu.hpp" #include "PICA/regs.hpp" #include "math_util.hpp" @@ -22,6 +23,11 @@ void RendererGL::reset() { colourBufferCache.reset(); textureCache.reset(); + for (auto& shader : shaderCache) { + shader.second.program.free(); + } + shaderCache.clear(); + // Init the colour/depth buffer settings to some random defaults on reset colourBufferLoc = 0; colourBufferFormat = PICA::ColorFmt::RGBA8; @@ -38,12 +44,16 @@ void RendererGL::reset() { oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthmapEnable = false; // Enable w buffering - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); gl.useProgram(oldProgram); // Switch to old GL program } + +#ifdef __ANDROID__ + fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL); +#endif } void RendererGL::initGraphicsContextInternal() { @@ -219,8 +229,8 @@ void RendererGL::setupBlending() { OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f); // Translate equations and funcs to their GL equivalents and set them - glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); - glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); + gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]); + gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]); } } @@ -272,10 +282,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) { glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]); } - -void RendererGL::setupTextureEnvState() { +void RendererGL::setupUbershaderTexEnv() { // TODO: Only update uniforms when the TEV config changed. Use an UBO potentially. - static constexpr std::array ioBases = { PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, @@ -297,11 +305,11 @@ void RendererGL::setupTextureEnvState() { textureEnvScaleRegs[i] = regs[ioBase + 4]; } - glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs); - glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs); - glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); - glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); - glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); + glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs); + glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs); + glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs); + glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs); + glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs); } void RendererGL::bindTexturesToSlots() { @@ -372,11 +380,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v OpenGL::Triangle, }; + if (usingUbershader) { + gl.useProgram(triangleProgram); + } else { + OpenGL::Program& program = getSpecializedShader(); + gl.useProgram(program); + } + const auto primitiveTopology = primTypes[static_cast(primType)]; gl.disableScissor(); gl.bindVBO(vbo); gl.bindVAO(vao); - gl.useProgram(triangleProgram); gl.enableClipPlane(0); // Clipping plane 0 is always enabled if (regs[PICA::InternalRegs::ClipEnable] & 1) { @@ -397,33 +411,35 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span v static constexpr std::array depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL}; - const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); - const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); - const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; + // Update ubershader uniforms + if (usingUbershader) { + const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1; - // Update depth uniforms - if (oldDepthScale != depthScale) { - oldDepthScale = depthScale; - glUniform1f(depthScaleLoc, depthScale); + if (oldDepthScale != depthScale) { + oldDepthScale = depthScale; + glUniform1f(ubershaderData.depthScaleLoc, depthScale); + } + + if (oldDepthOffset != depthOffset) { + oldDepthOffset = depthOffset; + glUniform1f(ubershaderData.depthOffsetLoc, depthOffset); + } + + if (oldDepthmapEnable != depthMapEnable) { + oldDepthmapEnable = depthMapEnable; + glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable); + } + + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]); + setupUbershaderTexEnv(); } - if (oldDepthOffset != depthOffset) { - oldDepthOffset = depthOffset; - glUniform1f(depthOffsetLoc, depthOffset); - } - - if (oldDepthmapEnable != depthMapEnable) { - oldDepthmapEnable = depthMapEnable; - glUniform1i(depthmapEnableLoc, depthMapEnable); - } - - setupTextureEnvState(); bindTexturesToSlots(); - // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48) - // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates - glUniform1uiv(picaRegLoc, 0x200 - 0x48, ®s[0x48]); - if (gpu.lightingLUTDirty) { updateLightingLUT(); } @@ -761,6 +777,104 @@ std::optional RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt return colourBufferCache.add(sampleBuffer); } +OpenGL::Program& RendererGL::getSpecializedShader() { + constexpr uint uboBlockBinding = 2; + + PICA::FragmentConfig fsConfig; + auto& outConfig = fsConfig.outConfig; + auto& texConfig = fsConfig.texConfig; + + auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig]; + auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig); + + outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast(alphaTestFunction) : PICA::CompareFunction::Always; + outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1; + + texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg]; + texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer]; + + // Set up TEV stages + std::memcpy(&texConfig.tevConfigs[0 * 5], ®s[InternalRegs::TexEnv0Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[1 * 5], ®s[InternalRegs::TexEnv1Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[2 * 5], ®s[InternalRegs::TexEnv2Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[3 * 5], ®s[InternalRegs::TexEnv3Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[4 * 5], ®s[InternalRegs::TexEnv4Source], 5 * sizeof(u32)); + std::memcpy(&texConfig.tevConfigs[5 * 5], ®s[InternalRegs::TexEnv5Source], 5 * sizeof(u32)); + + CachedProgram& programEntry = shaderCache[fsConfig]; + OpenGL::Program& program = programEntry.program; + + if (!program.exists()) { + std::string vs = fragShaderGen.getVertexShader(regs); + std::string fs = fragShaderGen.generate(regs); + + OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex); + OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment); + program.create({vertShader, fragShader}); + gl.useProgram(program); + + // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 + glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); + glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1); + glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3); + + // Allocate memory for the program UBO + glGenBuffers(1, &programEntry.uboBinding); + gl.bindUBO(programEntry.uboBinding); + glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW); + + // Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people, + // As it's an OpenGL 4.2 feature that MacOS doesn't support... + uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms"); + glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding); + glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding); + } + + // Upload uniform data to our shader's UBO + PICA::FragmentUniforms uniforms; + uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]); + + // Set up the texenv buffer color + const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor]; + uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f; + uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f; + uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f; + uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f; + + uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32(); + uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32(); + + if (regs[InternalRegs::ClipEnable] & 1) { + uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32(); + uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32(); + uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32(); + uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32(); + } + + // Set up the constant color for the 6 TEV stages + for (int i = 0; i < 6; i++) { + static constexpr std::array ioBases = { + PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source, + PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source, + }; + + auto& vec = uniforms.constantColors[i]; + u32 base = ioBases[i]; + u32 color = regs[base + 3]; + + vec[0] = float(color & 0xFF) / 255.0f; + vec[1] = float((color >> 8) & 0xFF) / 255.0f; + vec[2] = float((color >> 16) & 0xFF) / 255.0f; + vec[3] = float((color >> 24) & 0xFF) / 255.0f; + } + + gl.bindUBO(programEntry.uboBinding); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms); + + return program; +} + void RendererGL::screenshot(const std::string& name) { constexpr uint width = 400; constexpr uint height = 2 * 240; @@ -792,6 +906,11 @@ void RendererGL::deinitGraphicsContext() { depthBufferCache.reset(); colourBufferCache.reset(); + for (auto& shader : shaderCache) { + shader.second.program.free(); + } + shaderCache.clear(); + // All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext // TODO: Make it so that depth and colour buffers get written back to 3DS memory printf("RendererGL::DeinitGraphicsContext called\n"); @@ -814,24 +933,24 @@ void RendererGL::setUbershader(const std::string& shader) { initUbershader(triangleProgram); - glUniform1f(depthScaleLoc, oldDepthScale); - glUniform1f(depthOffsetLoc, oldDepthOffset); - glUniform1i(depthmapEnableLoc, oldDepthmapEnable); + glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale); + glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset); + glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable); } void RendererGL::initUbershader(OpenGL::Program& program) { gl.useProgram(program); - textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); - textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); - textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); - textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); - textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); + ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource"); + ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand"); + ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner"); + ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor"); + ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale"); - depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); - depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); - depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); - picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); + ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale"); + ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset"); + ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable"); + ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs"); // Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3 glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0); diff --git a/src/libretro_core.cpp b/src/libretro_core.cpp index 3825d3ed..a6a1ff00 100644 --- a/src/libretro_core.cpp +++ b/src/libretro_core.cpp @@ -147,6 +147,7 @@ static void configInit() { static const retro_variable values[] = { {"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"}, {"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"}, + {"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"}, {"panda3ds_use_vsync", "Enable VSync; enabled|disabled"}, {"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"}, {"panda3ds_use_audio", "Enable audio; disabled|enabled"}, @@ -173,6 +174,7 @@ static void configUpdate() { config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true); config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false); config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false); + config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true); config.discordRpcEnabled = false; config.save(); diff --git a/third_party/opengl/opengl.hpp b/third_party/opengl/opengl.hpp index 9997e63b..828fb784 100644 --- a/third_party/opengl/opengl.hpp +++ b/third_party/opengl/opengl.hpp @@ -397,34 +397,41 @@ namespace OpenGL { }; struct Program { - GLuint m_handle = 0; + GLuint m_handle = 0; - bool create(std::initializer_list> shaders) { - m_handle = glCreateProgram(); - for (const auto& shader : shaders) { - glAttachShader(m_handle, shader.get().handle()); - } + bool create(std::initializer_list> shaders) { + m_handle = glCreateProgram(); + for (const auto& shader : shaders) { + glAttachShader(m_handle, shader.get().handle()); + } - glLinkProgram(m_handle); - GLint success; - glGetProgramiv(m_handle, GL_LINK_STATUS, &success); + glLinkProgram(m_handle); + GLint success; + glGetProgramiv(m_handle, GL_LINK_STATUS, &success); - if (!success) { - char buf[4096]; - glGetProgramInfoLog(m_handle, 4096, nullptr, buf); - fprintf(stderr, "Failed to link program\nError: %s\n", buf); - glDeleteProgram(m_handle); + if (!success) { + char buf[4096]; + glGetProgramInfoLog(m_handle, 4096, nullptr, buf); + fprintf(stderr, "Failed to link program\nError: %s\n", buf); + glDeleteProgram(m_handle); - m_handle = 0; - } + m_handle = 0; + } - return m_handle != 0; - } + return m_handle != 0; + } - GLuint handle() const { return m_handle; } - bool exists() const { return m_handle != 0; } - void use() const { glUseProgram(m_handle); } - }; + GLuint handle() const { return m_handle; } + bool exists() const { return m_handle != 0; } + void use() const { glUseProgram(m_handle); } + + void free() { + if (exists()) { + glDeleteProgram(m_handle); + m_handle = 0; + } + } + }; static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { glDispatchCompute(groupsX, groupsY, groupsZ);