Merge pull request #514 from wheremyfoodat/specialized-shaders-2

WIP fragment pipeline GLSL recompiler
This commit is contained in:
wheremyfoodat 2024-07-16 23:54:28 +00:00 committed by GitHub
commit 6bef278f43
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 987 additions and 91 deletions

View file

@ -198,7 +198,7 @@ set(SERVICE_SOURCE_FILES src/core/services/service_manager.cpp src/core/services
set(PICA_SOURCE_FILES src/core/PICA/gpu.cpp src/core/PICA/regs.cpp src/core/PICA/shader_unit.cpp
src/core/PICA/shader_interpreter.cpp src/core/PICA/dynapica/shader_rec.cpp
src/core/PICA/dynapica/shader_rec_emitter_x64.cpp src/core/PICA/pica_hash.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp
src/core/PICA/dynapica/shader_rec_emitter_arm64.cpp src/core/PICA/shader_gen_glsl.cpp
)
set(LOADER_SOURCE_FILES src/core/loader/elf.cpp src/core/loader/ncsd.cpp src/core/loader/ncch.cpp src/core/loader/3dsx.cpp src/core/loader/lz77.cpp)
@ -245,10 +245,11 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
include/services/news_u.hpp include/applets/software_keyboard.hpp include/applets/applet_manager.hpp include/fs/archive_user_save_data.hpp
include/services/amiibo_device.hpp include/services/nfc_types.hpp include/swap.hpp include/services/csnd.hpp include/services/nwm_uds.hpp
include/fs/archive_system_save_data.hpp include/lua_manager.hpp include/memory_mapped_file.hpp include/hydra_icon.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp
include/PICA/dynapica/shader_rec_emitter_arm64.hpp include/scheduler.hpp include/applets/error_applet.hpp include/PICA/shader_gen.hpp
include/audio/dsp_core.hpp include/audio/null_core.hpp include/audio/teakra_core.hpp
include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp
include/PICA/pica_frag_uniforms.hpp
)
cmrc_add_resource_library(

View file

@ -0,0 +1,52 @@
#pragma once
#include <array>
#include <cstring>
#include <type_traits>
#include <unordered_map>
#include "PICA/pica_hash.hpp"
#include "PICA/regs.hpp"
#include "bitfield.hpp"
#include "helpers.hpp"
namespace PICA {
struct OutputConfig {
union {
u32 raw;
// Merge the enable + compare function into 1 field to avoid duplicate shaders
// enable == off means a CompareFunction of Always
BitField<0, 3, CompareFunction> alphaTestFunction;
BitField<4, 1, u32> depthMapEnable;
};
};
struct TextureConfig {
u32 texUnitConfig;
u32 texEnvUpdateBuffer;
// There's 6 TEV stages, and each one is configured via 5 word-sized registers
std::array<u32, 5 * 6> tevConfigs;
};
// Config used for identifying unique fragment pipeline configurations
struct FragmentConfig {
OutputConfig outConfig;
TextureConfig texConfig;
bool operator==(const FragmentConfig& config) const {
// Hash function and equality operator required by std::unordered_map
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
};
static_assert(
std::has_unique_object_representations<OutputConfig>() && std::has_unique_object_representations<TextureConfig>() &&
std::has_unique_object_representations<FragmentConfig>()
);
} // namespace PICA
// Override std::hash for our fragment config class
template <>
struct std::hash<PICA::FragmentConfig> {
std::size_t operator()(const PICA::FragmentConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
};

View file

@ -0,0 +1,21 @@
#pragma once
#include <array>
#include <type_traits>
#include "helpers.hpp"
namespace PICA {
struct FragmentUniforms {
using vec3 = std::array<float, 3>;
using vec4 = std::array<float, 4>;
static constexpr usize tevStageCount = 6;
s32 alphaReference;
float depthScale;
float depthOffset;
alignas(16) vec4 constantColors[tevStageCount];
alignas(16) vec4 tevBufferColor;
alignas(16) vec4 clipCoords;
};
} // namespace PICA

View file

@ -345,4 +345,131 @@ namespace PICA {
GeometryPrimitive = 3,
};
enum class CompareFunction : u32 {
Never = 0,
Always = 1,
Equal = 2,
NotEqual = 3,
Less = 4,
LessOrEqual = 5,
Greater = 6,
GreaterOrEqual = 7,
};
struct TexEnvConfig {
enum class Source : u8 {
PrimaryColor = 0x0,
PrimaryFragmentColor = 0x1,
SecondaryFragmentColor = 0x2,
Texture0 = 0x3,
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
// TODO: Inbetween values are unknown
PreviousBuffer = 0xD,
Constant = 0xE,
Previous = 0xF,
};
enum class ColorOperand : u8 {
SourceColor = 0x0,
OneMinusSourceColor = 0x1,
SourceAlpha = 0x2,
OneMinusSourceAlpha = 0x3,
SourceRed = 0x4,
OneMinusSourceRed = 0x5,
// TODO: Inbetween values are unknown
SourceGreen = 0x8,
OneMinusSourceGreen = 0x9,
// Inbetween values are unknown
SourceBlue = 0xC,
OneMinusSourceBlue = 0xD,
};
enum class AlphaOperand : u8 {
SourceAlpha = 0x0,
OneMinusSourceAlpha = 0x1,
SourceRed = 0x2,
OneMinusSourceRed = 0x3,
SourceGreen = 0x4,
OneMinusSourceGreen = 0x5,
SourceBlue = 0x6,
OneMinusSourceBlue = 0x7,
};
enum class Operation : u8 {
Replace = 0,
Modulate = 1,
Add = 2,
AddSigned = 3,
Lerp = 4,
Subtract = 5,
Dot3RGB = 6,
Dot3RGBA = 7,
MultiplyAdd = 8,
AddMultiply = 9,
};
// RGB sources
Source colorSource1, colorSource2, colorSource3;
// Alpha sources
Source alphaSource1, alphaSource2, alphaSource3;
// RGB operands
ColorOperand colorOperand1, colorOperand2, colorOperand3;
// Alpha operands
AlphaOperand alphaOperand1, alphaOperand2, alphaOperand3;
// Texture environment operations for this stage
Operation colorOp, alphaOp;
u32 constColor;
private:
// These are the only private members since their value doesn't actually reflect the scale
// So we make them public so we'll always use the appropriate member functions instead
u8 colorScale;
u8 alphaScale;
public:
// Create texture environment object from TEV registers
TexEnvConfig(u32 source, u32 operand, u32 combiner, u32 color, u32 scale) : constColor(color) {
colorSource1 = Helpers::getBits<0, 4, Source>(source);
colorSource2 = Helpers::getBits<4, 4, Source>(source);
colorSource3 = Helpers::getBits<8, 4, Source>(source);
alphaSource1 = Helpers::getBits<16, 4, Source>(source);
alphaSource2 = Helpers::getBits<20, 4, Source>(source);
alphaSource3 = Helpers::getBits<24, 4, Source>(source);
colorOperand1 = Helpers::getBits<0, 4, ColorOperand>(operand);
colorOperand2 = Helpers::getBits<4, 4, ColorOperand>(operand);
colorOperand3 = Helpers::getBits<8, 4, ColorOperand>(operand);
alphaOperand1 = Helpers::getBits<12, 3, AlphaOperand>(operand);
alphaOperand2 = Helpers::getBits<16, 3, AlphaOperand>(operand);
alphaOperand3 = Helpers::getBits<20, 3, AlphaOperand>(operand);
colorOp = Helpers::getBits<0, 4, Operation>(combiner);
alphaOp = Helpers::getBits<16, 4, Operation>(combiner);
colorScale = Helpers::getBits<0, 2>(scale);
alphaScale = Helpers::getBits<16, 2>(scale);
}
u32 getColorScale() { return (colorScale <= 2) ? (1 << colorScale) : 1; }
u32 getAlphaScale() { return (alphaScale <= 2) ? (1 << alphaScale) : 1; }
bool isPassthroughStage() {
// clang-format off
// Thank you to the Citra dev that wrote this out
return (
colorOp == Operation::Replace && alphaOp == Operation::Replace &&
colorSource1 == Source::Previous && alphaSource1 == Source::Previous &&
colorOperand1 == ColorOperand::SourceColor && alphaOperand1 == AlphaOperand::SourceAlpha &&
getColorScale() == 1 && getAlphaScale() == 1
);
// clang-format on
}
};
} // namespace PICA

View file

@ -0,0 +1,41 @@
#pragma once
#include <string>
#include "PICA/gpu.hpp"
#include "PICA/regs.hpp"
#include "helpers.hpp"
namespace PICA::ShaderGen {
// Graphics API this shader is targetting
enum class API { GL, GLES, Vulkan };
// Shading language to use (Only GLSL for the time being)
enum class Language { GLSL };
class FragmentGenerator {
using PICARegs = std::array<u32, 0x300>;
API api;
Language language;
void compileTEV(std::string& shader, int stage, const PICARegs& regs);
void getSource(std::string& shader, PICA::TexEnvConfig::Source source, int index);
void getColorOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::ColorOperand color, int index);
void getAlphaOperand(std::string& shader, PICA::TexEnvConfig::Source source, PICA::TexEnvConfig::AlphaOperand alpha, int index);
void getColorOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void getAlphaOperation(std::string& shader, PICA::TexEnvConfig::Operation op);
void applyAlphaTest(std::string& shader, const PICARegs& regs);
u32 textureConfig = 0;
public:
FragmentGenerator(API api, Language language) : api(api), language(language) {}
std::string generate(const PICARegs& regs);
std::string getVertexShader(const PICARegs& regs);
void setTarget(API api, Language language) {
this->api = api;
this->language = language;
}
};
}; // namespace PICA::ShaderGen

View file

@ -13,8 +13,11 @@ struct EmulatorConfig {
static constexpr bool shaderJitDefault = false;
#endif
static constexpr bool ubershaderDefault = true;
bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false;
bool useUbershaders = ubershaderDefault;
bool accurateShaderMul = false;
RendererType rendererType = RendererType::OpenGL;
Audio::DSPCore::Type dspType = Audio::DSPCore::Type::Null;

View file

@ -74,6 +74,8 @@ class Renderer {
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}
virtual void setUbershaderSetting(bool value) {}
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext(GL::Context* context) { Helpers::panic("Tried to initialize incompatible renderer with GL context"); }

View file

@ -40,9 +40,13 @@ struct GLStateManager {
GLuint boundVAO;
GLuint boundVBO;
GLuint currentProgram;
GLuint boundUBO;
GLenum depthFunc;
GLenum logicOp;
GLenum blendEquationRGB, blendEquationAlpha;
GLenum blendFuncSourceRGB, blendFuncSourceAlpha;
GLenum blendFuncDestRGB, blendFuncDestAlpha;
void reset();
void resetBlend();
@ -51,7 +55,7 @@ struct GLStateManager {
void resetColourMask();
void resetDepth();
void resetVAO();
void resetVBO();
void resetBuffers();
void resetProgram();
void resetScissor();
void resetStencil();
@ -183,6 +187,13 @@ struct GLStateManager {
}
}
void bindUBO(GLuint handle) {
if (boundUBO != handle) {
boundUBO = handle;
glBindBuffer(GL_UNIFORM_BUFFER, boundUBO);
}
}
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
@ -224,6 +235,41 @@ struct GLStateManager {
}
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
// Counterpart to glBlendEquationSeparate
void setBlendEquation(GLenum modeRGB, GLenum modeAlpha) {
if (blendEquationRGB != modeRGB || blendEquationAlpha != modeAlpha) {
blendEquationRGB = modeRGB;
blendEquationAlpha = modeAlpha;
glBlendEquationSeparate(modeRGB, modeAlpha);
}
}
// Counterpart to glBlendFuncSeparate
void setBlendFunc(GLenum sourceRGB, GLenum destRGB, GLenum sourceAlpha, GLenum destAlpha) {
if (blendFuncSourceRGB != sourceRGB || blendFuncDestRGB != destRGB || blendFuncSourceAlpha != sourceAlpha ||
blendFuncDestAlpha != destAlpha) {
blendFuncSourceRGB = sourceRGB;
blendFuncDestRGB = destRGB;
blendFuncSourceAlpha = sourceAlpha;
blendFuncDestAlpha = destAlpha;
glBlendFuncSeparate(sourceRGB, destRGB,sourceAlpha, destAlpha);
}
}
// Counterpart to regular glBlendEquation
void setBlendEquation(GLenum mode) { setBlendEquation(mode, mode); }
void setBlendEquation(OpenGL::BlendEquation modeRGB, OpenGL::BlendEquation modeAlpha) {
setBlendEquation(static_cast<GLenum>(modeRGB), static_cast<GLenum>(modeAlpha));
}
void setBlendEquation(OpenGL::BlendEquation mode) {
setBlendEquation(static_cast<GLenum>(mode));
}
};
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");

View file

@ -1,11 +1,17 @@
#pragma once
#include <array>
#include <cstring>
#include <functional>
#include <span>
#include <unordered_map>
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_config.hpp"
#include "PICA/pica_hash.hpp"
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
#include "gl_state.hpp"
#include "helpers.hpp"
#include "logger.hpp"
@ -24,21 +30,25 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
bool usingUbershader = true;
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
GLint textureEnvOperandLoc = -1;
GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Uniform of PICA registers
GLint picaRegLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
// Depth configuration uniform locations
GLint depthOffsetLoc = -1;
GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1;
} ubershaderData;
float oldDepthScale = -1.0;
float oldDepthOffset = 0.0;
@ -57,21 +67,31 @@ class RendererGL final : public Renderer {
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Texture blankTexture;
// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
uint uboBinding;
};
std::unordered_map<PICA::FragmentConfig, CachedProgram> shaderCache;
OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program& getSpecializedShader();
PICA::ShaderGen::FragmentGenerator fragShaderGen;
MAKE_LOG_FUNCTION(log, rendererLogger)
void setupBlending();
void setupStencilTest(bool stencilEnable);
void bindDepthBuffer();
void setupTextureEnvState();
void setupUbershaderTexEnv();
void bindTexturesToSlots();
void updateLightingLUT();
void initGraphicsContextInternal();
public:
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
: Renderer(gpu, internalRegs, externalRegs) {}
: Renderer(gpu, internalRegs, externalRegs), fragShaderGen(PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL) {}
~RendererGL() override;
void reset() override;
@ -87,6 +107,8 @@ class RendererGL final : public Renderer {
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual void setUbershaderSetting(bool value) override { usingUbershader = value; }
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
// Note: The caller is responsible for deleting the currently bound FBO before calling this
@ -100,4 +122,4 @@ class RendererGL final : public Renderer {
// Take a screenshot of the screen and store it in a file
void screenshot(const std::string& name) override;
};
};

View file

@ -62,6 +62,7 @@ void EmulatorConfig::load() {
shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
}
}
@ -123,10 +124,13 @@ void EmulatorConfig::save() {
data["General"]["EnableDiscordRPC"] = discordRpcEnabled;
data["General"]["UsePortableBuild"] = usePortableBuild;
data["General"]["DefaultRomPath"] = defaultRomPath.string();
data["GPU"]["EnableShaderJIT"] = shaderJitEnabled;
data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType));
data["GPU"]["EnableVSync"] = vsyncEnabled;
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
data["GPU"]["UseUbershaders"] = useUbershaders;
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));
data["Audio"]["EnableAudio"] = audioEnabled;

View file

@ -110,6 +110,7 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0;
renderer->setUbershaderSetting(config.useUbershaders);
renderer->reset();
}

View file

@ -0,0 +1,434 @@
#include "PICA/shader_gen.hpp"
using namespace PICA;
using namespace PICA::ShaderGen;
static constexpr const char* uniformDefinition = R"(
layout(std140) uniform FragmentUniforms {
int alphaReference;
float depthScale;
float depthOffset;
vec4 constantColors[6];
vec4 tevBufferColor;
vec4 clipCoords;
};
)";
std::string FragmentGenerator::getVertexShader(const PICARegs& regs) {
std::string ret = "";
switch (api) {
case API::GL: ret += "#version 410 core"; break;
case API::GLES: ret += "#version 300 es"; break;
default: break;
}
if (api == API::GLES) {
ret += R"(
#define USING_GLES 1
precision mediump int;
precision mediump float;
)";
}
ret += uniformDefinition;
ret += R"(
layout(location = 0) in vec4 a_coords;
layout(location = 1) in vec4 a_quaternion;
layout(location = 2) in vec4 a_vertexColour;
layout(location = 3) in vec2 a_texcoord0;
layout(location = 4) in vec2 a_texcoord1;
layout(location = 5) in float a_texcoord0_w;
layout(location = 6) in vec3 a_view;
layout(location = 7) in vec2 a_texcoord2;
out vec3 v_normal;
out vec3 v_tangent;
out vec3 v_bitangent;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
out vec3 v_view;
out vec2 v_texcoord2;
#ifndef USING_GLES
out float gl_ClipDistance[2];
#endif
vec4 abgr8888ToVec4(uint abgr) {
const float scale = 1.0 / 255.0;
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
vec3 u = q.xyz;
float s = q.w;
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
}
void main() {
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
v_colour = min(colourAbs, vec4(1.f));
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
#ifndef USING_GLES
gl_ClipDistance[0] = -a_coords.z;
gl_ClipDistance[1] = dot(clipCoords, a_coords);
#endif
}
)";
return ret;
}
std::string FragmentGenerator::generate(const PICARegs& regs) {
std::string ret = "";
switch (api) {
case API::GL: ret += "#version 410 core"; break;
case API::GLES: ret += "#version 300 es"; break;
default: break;
}
bool unimplementedFlag = false;
if (api == API::GLES) {
ret += R"(
#define USING_GLES 1
precision mediump int;
precision mediump float;
)";
}
// Input and output attributes
ret += R"(
in vec3 v_tangent;
in vec3 v_normal;
in vec3 v_bitangent;
in vec4 v_colour;
in vec3 v_texcoord0;
in vec2 v_texcoord1;
in vec3 v_view;
in vec2 v_texcoord2;
out vec4 fragColor;
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
// GLES doesn't support sampler1DArray, as such we'll have to change how we handle lighting later
#ifndef USING_GLES
uniform sampler1DArray u_tex_lighting_lut;
#endif
)";
ret += uniformDefinition;
// Emit main function for fragment shader
// When not initialized, source 13 is set to vec4(0.0) and 15 is set to the vertex colour
ret += R"(
void main() {
vec4 combinerOutput = v_colour;
vec4 previousBuffer = vec4(0.0);
vec4 tevNextPreviousBuffer = tevBufferColor;
)";
ret += R"(
vec3 colorOp1 = vec3(0.0);
vec3 colorOp2 = vec3(0.0);
vec3 colorOp3 = vec3(0.0);
float alphaOp1 = 0.0;
float alphaOp2 = 0.0;
float alphaOp3 = 0.0;
)";
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
ret += R"(
float z_over_w = gl_FragCoord.z * 2.0f - 1.0f;
float depth = z_over_w * depthScale + depthOffset;
)";
if ((regs[InternalRegs::DepthmapEnable] & 1) == 0) {
ret += "depth /= gl_FragCoord.w;\n";
}
ret += "gl_FragDepth = depth;\n";
textureConfig = regs[InternalRegs::TexUnitCfg];
for (int i = 0; i < 6; i++) {
compileTEV(ret, i, regs);
}
applyAlphaTest(ret, regs);
ret += "fragColor = combinerOutput;\n";
ret += "}"; // End of main function
ret += "\n\n\n\n\n\n\n";
return ret;
}
void FragmentGenerator::compileTEV(std::string& shader, int stage, const PICARegs& regs) {
// Base address for each TEV stage's configuration
static constexpr std::array<u32, 6> ioBases = {
InternalRegs::TexEnv0Source, InternalRegs::TexEnv1Source, InternalRegs::TexEnv2Source,
InternalRegs::TexEnv3Source, InternalRegs::TexEnv4Source, InternalRegs::TexEnv5Source,
};
const u32 ioBase = ioBases[stage];
TexEnvConfig tev(regs[ioBase], regs[ioBase + 1], regs[ioBase + 2], regs[ioBase + 3], regs[ioBase + 4]);
if (!tev.isPassthroughStage()) {
// Get color operands
shader += "colorOp1 = ";
getColorOperand(shader, tev.colorSource1, tev.colorOperand1, stage);
shader += ";\ncolorOp2 = ";
getColorOperand(shader, tev.colorSource2, tev.colorOperand2, stage);
shader += ";\ncolorOp3 = ";
getColorOperand(shader, tev.colorSource3, tev.colorOperand3, stage);
shader += ";\nvec3 outputColor" + std::to_string(stage) + " = clamp(";
getColorOperation(shader, tev.colorOp);
shader += ", vec3(0.0), vec3(1.0));\n";
if (tev.colorOp == TexEnvConfig::Operation::Dot3RGBA) {
// Dot3 RGBA also writes to the alpha component so we don't need to do anything more
shader += "float outputAlpha" + std::to_string(stage) + " = outputColor" + std::to_string(stage) + ".x;\n";
} else {
// Get alpha operands
shader += "alphaOp1 = ";
getAlphaOperand(shader, tev.alphaSource1, tev.alphaOperand1, stage);
shader += ";\nalphaOp2 = ";
getAlphaOperand(shader, tev.alphaSource2, tev.alphaOperand2, stage);
shader += ";\nalphaOp3 = ";
getAlphaOperand(shader, tev.alphaSource3, tev.alphaOperand3, stage);
shader += ";\nfloat outputAlpha" + std::to_string(stage) + " = clamp(";
getAlphaOperation(shader, tev.alphaOp);
// Clamp the alpha value to [0.0, 1.0]
shader += ", 0.0, 1.0);\n";
}
shader += "combinerOutput = vec4(clamp(outputColor" + std::to_string(stage) + " * " + std::to_string(tev.getColorScale()) +
".0, vec3(0.0), vec3(1.0)), clamp(outputAlpha" + std::to_string(stage) + " * " + std::to_string(tev.getAlphaScale()) +
".0, 0.0, 1.0));\n";
}
shader += "previousBuffer = tevNextPreviousBuffer;\n\n";
// Update the "next previous buffer" if necessary
const u32 textureEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
if (stage < 4) {
// Check whether to update rgb
if ((textureEnvUpdateBuffer & (0x100 << stage))) {
shader += "tevNextPreviousBuffer.rgb = combinerOutput.rgb;\n";
}
// And whether to update alpha
if ((textureEnvUpdateBuffer & (0x1000u << stage))) {
shader += "tevNextPreviousBuffer.a = combinerOutput.a;\n";
}
}
}
void FragmentGenerator::getColorOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::ColorOperand color, int index) {
using OperandType = TexEnvConfig::ColorOperand;
// For inverting operands, add the 1.0 - x subtraction
if (color == OperandType::OneMinusSourceColor || color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen ||
color == OperandType::OneMinusSourceBlue || color == OperandType::OneMinusSourceAlpha) {
shader += "vec3(1.0, 1.0, 1.0) - ";
}
switch (color) {
case OperandType::SourceColor:
case OperandType::OneMinusSourceColor:
getSource(shader, source, index);
shader += ".rgb";
break;
case OperandType::SourceRed:
case OperandType::OneMinusSourceRed:
getSource(shader, source, index);
shader += ".rrr";
break;
case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen:
getSource(shader, source, index);
shader += ".ggg";
break;
case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue:
getSource(shader, source, index);
shader += ".bbb";
break;
case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index);
shader += ".aaa";
break;
default:
shader += "vec3(1.0, 1.0, 1.0)";
Helpers::warn("FragmentGenerator: Invalid TEV color operand");
break;
}
}
void FragmentGenerator::getAlphaOperand(std::string& shader, TexEnvConfig::Source source, TexEnvConfig::AlphaOperand color, int index) {
using OperandType = TexEnvConfig::AlphaOperand;
// For inverting operands, add the 1.0 - x subtraction
if (color == OperandType::OneMinusSourceRed || color == OperandType::OneMinusSourceGreen || color == OperandType::OneMinusSourceBlue ||
color == OperandType::OneMinusSourceAlpha) {
shader += "1.0 - ";
}
switch (color) {
case OperandType::SourceRed:
case OperandType::OneMinusSourceRed:
getSource(shader, source, index);
shader += ".r";
break;
case OperandType::SourceGreen:
case OperandType::OneMinusSourceGreen:
getSource(shader, source, index);
shader += ".g";
break;
case OperandType::SourceBlue:
case OperandType::OneMinusSourceBlue:
getSource(shader, source, index);
shader += ".b";
break;
case OperandType::SourceAlpha:
case OperandType::OneMinusSourceAlpha:
getSource(shader, source, index);
shader += ".a";
break;
default:
shader += "1.0";
Helpers::warn("FragmentGenerator: Invalid TEV color operand");
break;
}
}
void FragmentGenerator::getSource(std::string& shader, TexEnvConfig::Source source, int index) {
switch (source) {
case TexEnvConfig::Source::PrimaryColor: shader += "v_colour"; break;
case TexEnvConfig::Source::Texture0: shader += "texture(u_tex0, v_texcoord0.xy)"; break;
case TexEnvConfig::Source::Texture1: shader += "texture(u_tex1, v_texcoord1)"; break;
case TexEnvConfig::Source::Texture2: {
// If bit 13 in texture config is set then we use the texcoords for texture 1, otherwise for texture 2
if (Helpers::getBit<13>(textureConfig)) {
shader += "texture(u_tex2, v_texcoord1)";
} else {
shader += "texture(u_tex2, v_texcoord2)";
}
break;
}
case TexEnvConfig::Source::Previous: shader += "combinerOutput"; break;
case TexEnvConfig::Source::Constant: shader += "constantColors[" + std::to_string(index) + "]"; break;
case TexEnvConfig::Source::PreviousBuffer: shader += "previousBuffer"; break;
// Lighting
case TexEnvConfig::Source::PrimaryFragmentColor:
case TexEnvConfig::Source::SecondaryFragmentColor: shader += "vec4(1.0, 1.0, 1.0, 1.0)"; break;
default:
Helpers::warn("Unimplemented TEV source: %d", static_cast<int>(source));
shader += "vec4(1.0, 1.0, 1.0, 1.0)";
break;
}
}
void FragmentGenerator::getColorOperation(std::string& shader, TexEnvConfig::Operation op) {
switch (op) {
case TexEnvConfig::Operation::Replace: shader += "colorOp1"; break;
case TexEnvConfig::Operation::Add: shader += "colorOp1 + colorOp2"; break;
case TexEnvConfig::Operation::AddSigned: shader += "colorOp1 + colorOp2 - vec3(0.5)"; break;
case TexEnvConfig::Operation::Subtract: shader += "colorOp1 - colorOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "colorOp1 * colorOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "mix(colorOp2, colorOp1, colorOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(colorOp1 + colorOp2, vec3(1.0)) * colorOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(colorOp1, colorOp2, colorOp3)"; break;
case TexEnvConfig::Operation::Dot3RGB:
case TexEnvConfig::Operation::Dot3RGBA: shader += "vec3(4.0 * dot(colorOp1 - vec3(0.5), colorOp2 - vec3(0.5)))"; break;
default:
Helpers::warn("FragmentGenerator: Unimplemented color op");
shader += "vec3(1.0)";
break;
}
}
void FragmentGenerator::getAlphaOperation(std::string& shader, TexEnvConfig::Operation op) {
switch (op) {
case TexEnvConfig::Operation::Replace: shader += "alphaOp1"; break;
case TexEnvConfig::Operation::Add: shader += "alphaOp1 + alphaOp2"; break;
case TexEnvConfig::Operation::AddSigned: shader += "alphaOp1 + alphaOp2 - 0.5"; break;
case TexEnvConfig::Operation::Subtract: shader += "alphaOp1 - alphaOp2"; break;
case TexEnvConfig::Operation::Modulate: shader += "alphaOp1 * alphaOp2"; break;
case TexEnvConfig::Operation::Lerp: shader += "mix(alphaOp2, alphaOp1, alphaOp3)"; break;
case TexEnvConfig::Operation::AddMultiply: shader += "min(alphaOp1 + alphaOp2, 1.0) * alphaOp3"; break;
case TexEnvConfig::Operation::MultiplyAdd: shader += "fma(alphaOp1, alphaOp2, alphaOp3)"; break;
default:
Helpers::warn("FragmentGenerator: Unimplemented alpha op");
shader += "1.0";
break;
}
}
void FragmentGenerator::applyAlphaTest(std::string& shader, const PICARegs& regs) {
const u32 alphaConfig = regs[InternalRegs::AlphaTestConfig];
const auto function = static_cast<CompareFunction>(Helpers::getBits<4, 3>(alphaConfig));
// Alpha test disabled
if (Helpers::getBit<0>(alphaConfig) == 0 || function == CompareFunction::Always) {
return;
}
shader += "int testingAlpha = int(combinerOutput.a * 255.0);\n";
shader += "if (";
switch (function) {
case CompareFunction::Never: shader += "true"; break;
case CompareFunction::Always: shader += "false"; break;
case CompareFunction::Equal: shader += "testingAlpha != alphaReference"; break;
case CompareFunction::NotEqual: shader += "testingAlpha == alphaReference"; break;
case CompareFunction::Less: shader += "testingAlpha >= alphaReference"; break;
case CompareFunction::LessOrEqual: shader += "testingAlpha > alphaReference"; break;
case CompareFunction::Greater: shader += "testingAlpha <= alphaReference"; break;
case CompareFunction::GreaterOrEqual: shader += "testingAlpha < alphaReference"; break;
default:
Helpers::warn("Unimplemented alpha test function");
shader += "false";
break;
}
shader += ") { discard; }\n";
}

View file

@ -5,9 +5,20 @@ void GLStateManager::resetBlend() {
logicOpEnabled = false;
logicOp = GL_COPY;
blendEquationRGB = GL_FUNC_ADD;
blendEquationAlpha = GL_FUNC_ADD;
blendFuncSourceRGB = GL_SRC_COLOR;
blendFuncDestRGB = GL_DST_COLOR;
blendFuncSourceAlpha = GL_SRC_ALPHA;
blendFuncDestAlpha = GL_DST_ALPHA;
OpenGL::disableBlend();
OpenGL::disableLogicOp();
OpenGL::setLogicOp(GL_COPY);
glBlendEquationSeparate(blendEquationRGB, blendEquationAlpha);
glBlendFuncSeparate(blendFuncSourceRGB, blendFuncDestRGB, blendFuncSourceAlpha, blendFuncDestAlpha);
}
void GLStateManager::resetClearing() {
@ -61,9 +72,12 @@ void GLStateManager::resetVAO() {
glBindVertexArray(0);
}
void GLStateManager::resetVBO() {
void GLStateManager::resetBuffers() {
boundVBO = 0;
boundUBO = 0;
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
void GLStateManager::resetProgram() {
@ -79,7 +93,7 @@ void GLStateManager::reset() {
resetDepth();
resetVAO();
resetVBO();
resetBuffers();
resetProgram();
resetScissor();
resetStencil();

View file

@ -5,6 +5,7 @@
#include <cmrc/cmrc.hpp>
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
#include "PICA/regs.hpp"
#include "math_util.hpp"
@ -22,6 +23,11 @@ void RendererGL::reset() {
colourBufferCache.reset();
textureCache.reset();
for (auto& shader : shaderCache) {
shader.second.program.free();
}
shaderCache.clear();
// Init the colour/depth buffer settings to some random defaults on reset
colourBufferLoc = 0;
colourBufferFormat = PICA::ColorFmt::RGBA8;
@ -38,12 +44,16 @@ void RendererGL::reset() {
oldDepthOffset = 0.0; // Default depth offset to 0
oldDepthmapEnable = false; // Enable w buffering
glUniform1f(depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
gl.useProgram(oldProgram); // Switch to old GL program
}
#ifdef __ANDROID__
fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL);
#endif
}
void RendererGL::initGraphicsContextInternal() {
@ -219,8 +229,8 @@ void RendererGL::setupBlending() {
OpenGL::setBlendColor(float(r) / 255.f, float(g) / 255.f, float(b) / 255.f, float(a) / 255.f);
// Translate equations and funcs to their GL equivalents and set them
glBlendEquationSeparate(blendingEquations[rgbEquation], blendingEquations[alphaEquation]);
glBlendFuncSeparate(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]);
gl.setBlendEquation(blendingEquations[rgbEquation], blendingEquations[alphaEquation]);
gl.setBlendFunc(blendingFuncs[rgbSourceFunc], blendingFuncs[rgbDestFunc], blendingFuncs[alphaSourceFunc], blendingFuncs[alphaDestFunc]);
}
}
@ -272,10 +282,8 @@ void RendererGL::setupStencilTest(bool stencilEnable) {
glStencilOp(stencilOps[stencilFailOp], stencilOps[depthFailOp], stencilOps[passOp]);
}
void RendererGL::setupTextureEnvState() {
void RendererGL::setupUbershaderTexEnv() {
// TODO: Only update uniforms when the TEV config changed. Use an UBO potentially.
static constexpr std::array<u32, 6> ioBases = {
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
@ -297,11 +305,11 @@ void RendererGL::setupTextureEnvState() {
textureEnvScaleRegs[i] = regs[ioBase + 4];
}
glUniform1uiv(textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs);
glUniform1uiv(ubershaderData.textureEnvSourceLoc, 6, textureEnvSourceRegs);
glUniform1uiv(ubershaderData.textureEnvOperandLoc, 6, textureEnvOperandRegs);
glUniform1uiv(ubershaderData.textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(ubershaderData.textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(ubershaderData.textureEnvScaleLoc, 6, textureEnvScaleRegs);
}
void RendererGL::bindTexturesToSlots() {
@ -372,11 +380,17 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor();
gl.bindVBO(vbo);
gl.bindVAO(vao);
gl.useProgram(triangleProgram);
gl.enableClipPlane(0); // Clipping plane 0 is always enabled
if (regs[PICA::InternalRegs::ClipEnable] & 1) {
@ -397,33 +411,35 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
// Update depth uniforms
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(depthScaleLoc, depthScale);
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(depthmapEnableLoc, depthMapEnable);
}
setupTextureEnvState();
bindTexturesToSlots();
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(picaRegLoc, 0x200 - 0x48, &regs[0x48]);
if (gpu.lightingLUTDirty) {
updateLightingLUT();
}
@ -761,6 +777,104 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
return colourBufferCache.add(sampleBuffer);
}
OpenGL::Program& RendererGL::getSpecializedShader() {
constexpr uint uboBlockBinding = 2;
PICA::FragmentConfig fsConfig;
auto& outConfig = fsConfig.outConfig;
auto& texConfig = fsConfig.texConfig;
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
outConfig.alphaTestFunction = (alphaTestConfig & 1) ? static_cast<PICA::CompareFunction>(alphaTestFunction) : PICA::CompareFunction::Always;
outConfig.depthMapEnable = regs[InternalRegs::DepthmapEnable] & 1;
texConfig.texUnitConfig = regs[InternalRegs::TexUnitCfg];
texConfig.texEnvUpdateBuffer = regs[InternalRegs::TexEnvUpdateBuffer];
// Set up TEV stages
std::memcpy(&texConfig.tevConfigs[0 * 5], &regs[InternalRegs::TexEnv0Source], 5 * sizeof(u32));
std::memcpy(&texConfig.tevConfigs[1 * 5], &regs[InternalRegs::TexEnv1Source], 5 * sizeof(u32));
std::memcpy(&texConfig.tevConfigs[2 * 5], &regs[InternalRegs::TexEnv2Source], 5 * sizeof(u32));
std::memcpy(&texConfig.tevConfigs[3 * 5], &regs[InternalRegs::TexEnv3Source], 5 * sizeof(u32));
std::memcpy(&texConfig.tevConfigs[4 * 5], &regs[InternalRegs::TexEnv4Source], 5 * sizeof(u32));
std::memcpy(&texConfig.tevConfigs[5 * 5], &regs[InternalRegs::TexEnv5Source], 5 * sizeof(u32));
CachedProgram& programEntry = shaderCache[fsConfig];
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
std::string vs = fragShaderGen.getVertexShader(regs);
std::string fs = fragShaderGen.generate(regs);
OpenGL::Shader vertShader({vs.c_str(), vs.size()}, OpenGL::Vertex);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({vertShader, fragShader});
gl.useProgram(program);
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
// Allocate memory for the program UBO
glGenBuffers(1, &programEntry.uboBinding);
gl.bindUBO(programEntry.uboBinding);
glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW);
// Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people,
// As it's an OpenGL 4.2 feature that MacOS doesn't support...
uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding);
glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, programEntry.uboBinding);
}
// Upload uniform data to our shader's UBO
PICA::FragmentUniforms uniforms;
uniforms.alphaReference = Helpers::getBits<8, 8>(regs[InternalRegs::AlphaTestConfig]);
// Set up the texenv buffer color
const u32 texEnvBufferColor = regs[InternalRegs::TexEnvBufferColor];
uniforms.tevBufferColor[0] = float(texEnvBufferColor & 0xFF) / 255.0f;
uniforms.tevBufferColor[1] = float((texEnvBufferColor >> 8) & 0xFF) / 255.0f;
uniforms.tevBufferColor[2] = float((texEnvBufferColor >> 16) & 0xFF) / 255.0f;
uniforms.tevBufferColor[3] = float((texEnvBufferColor >> 24) & 0xFF) / 255.0f;
uniforms.depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
uniforms.depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
if (regs[InternalRegs::ClipEnable] & 1) {
uniforms.clipCoords[0] = f24::fromRaw(regs[PICA::InternalRegs::ClipData0] & 0xffffff).toFloat32();
uniforms.clipCoords[1] = f24::fromRaw(regs[PICA::InternalRegs::ClipData1] & 0xffffff).toFloat32();
uniforms.clipCoords[2] = f24::fromRaw(regs[PICA::InternalRegs::ClipData2] & 0xffffff).toFloat32();
uniforms.clipCoords[3] = f24::fromRaw(regs[PICA::InternalRegs::ClipData3] & 0xffffff).toFloat32();
}
// Set up the constant color for the 6 TEV stages
for (int i = 0; i < 6; i++) {
static constexpr std::array<u32, 6> ioBases = {
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
};
auto& vec = uniforms.constantColors[i];
u32 base = ioBases[i];
u32 color = regs[base + 3];
vec[0] = float(color & 0xFF) / 255.0f;
vec[1] = float((color >> 8) & 0xFF) / 255.0f;
vec[2] = float((color >> 16) & 0xFF) / 255.0f;
vec[3] = float((color >> 24) & 0xFF) / 255.0f;
}
gl.bindUBO(programEntry.uboBinding);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(PICA::FragmentUniforms), &uniforms);
return program;
}
void RendererGL::screenshot(const std::string& name) {
constexpr uint width = 400;
constexpr uint height = 2 * 240;
@ -792,6 +906,11 @@ void RendererGL::deinitGraphicsContext() {
depthBufferCache.reset();
colourBufferCache.reset();
for (auto& shader : shaderCache) {
shader.second.program.free();
}
shaderCache.clear();
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
printf("RendererGL::DeinitGraphicsContext called\n");
@ -814,24 +933,24 @@ void RendererGL::setUbershader(const std::string& shader) {
initUbershader(triangleProgram);
glUniform1f(depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
glUniform1f(ubershaderData.depthScaleLoc, oldDepthScale);
glUniform1f(ubershaderData.depthOffsetLoc, oldDepthOffset);
glUniform1i(ubershaderData.depthmapEnableLoc, oldDepthmapEnable);
}
void RendererGL::initUbershader(OpenGL::Program& program) {
gl.useProgram(program);
textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource");
textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand");
textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner");
textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor");
textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale");
ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource");
ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand");
ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner");
ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor");
ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale");
depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale");
depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset");
depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable");
picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs");
ubershaderData.depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale");
ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset");
ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable");
ubershaderData.picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);

View file

@ -147,6 +147,7 @@ static void configInit() {
static const retro_variable values[] = {
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
{"panda3ds_use_ubershader", "Use ubershaders (No stutter, maybe slower); enabled|disabled"},
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
@ -173,6 +174,7 @@ static void configUpdate() {
config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true);
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true);
config.discordRpcEnabled = false;
config.save();

View file

@ -397,34 +397,41 @@ namespace OpenGL {
};
struct Program {
GLuint m_handle = 0;
GLuint m_handle = 0;
bool create(std::initializer_list<std::reference_wrapper<Shader>> shaders) {
m_handle = glCreateProgram();
for (const auto& shader : shaders) {
glAttachShader(m_handle, shader.get().handle());
}
bool create(std::initializer_list<std::reference_wrapper<Shader>> shaders) {
m_handle = glCreateProgram();
for (const auto& shader : shaders) {
glAttachShader(m_handle, shader.get().handle());
}
glLinkProgram(m_handle);
GLint success;
glGetProgramiv(m_handle, GL_LINK_STATUS, &success);
glLinkProgram(m_handle);
GLint success;
glGetProgramiv(m_handle, GL_LINK_STATUS, &success);
if (!success) {
char buf[4096];
glGetProgramInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to link program\nError: %s\n", buf);
glDeleteProgram(m_handle);
if (!success) {
char buf[4096];
glGetProgramInfoLog(m_handle, 4096, nullptr, buf);
fprintf(stderr, "Failed to link program\nError: %s\n", buf);
glDeleteProgram(m_handle);
m_handle = 0;
}
m_handle = 0;
}
return m_handle != 0;
}
return m_handle != 0;
}
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
void use() const { glUseProgram(m_handle); }
};
GLuint handle() const { return m_handle; }
bool exists() const { return m_handle != 0; }
void use() const { glUseProgram(m_handle); }
void free() {
if (exists()) {
glDeleteProgram(m_handle);
m_handle = 0;
}
}
};
static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {
glDispatchCompute(groupsX, groupsY, groupsZ);