This commit is contained in:
Paris Oplopoios 2024-08-10 23:37:29 +00:00 committed by GitHub
commit fce94fbb71
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 684 additions and 69 deletions

View file

@ -79,6 +79,7 @@ include_directories(third_party/stb)
include_directories(third_party/opengl)
include_directories(third_party/miniaudio)
include_directories(third_party/mio/single_include)
include_directories(third_party/lockfree)
add_compile_definitions(NOMINMAX) # Make windows.h not define min/max macros because third-party deps don't like it
add_compile_definitions(WIN32_LEAN_AND_MEAN) # Make windows.h not include literally everything
@ -301,7 +302,7 @@ if(ENABLE_QT_GUI)
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_wgl.cpp)
else()
set(THIRD_PARTY_SOURCE_FILES ${THIRD_PARTY_SOURCE_FILES} third_party/duckstation/gl/context_egl.cpp third_party/duckstation/gl/context_egl_wayland.cpp
third_party/duckstation/gl/context_egl_x11.cpp third_party/duckstation/gl/context_glx.cpp third_party/duckstation/gl/x11_window.cpp)
third_party/duckstation/gl/context_egl_x11.cpp third_party/duckstation/gl/x11_window.cpp)
endif()
endif()
@ -325,14 +326,14 @@ if(ENABLE_OPENGL)
set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp
include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
include/renderer_gl/gl_state.hpp
include/renderer_gl/gl_state.hpp include/renderer_gl/async_compiler.hpp
)
set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp
src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp
src/core/renderer_gl/gl_state.cpp src/host_shaders/opengl_display.frag
src/host_shaders/opengl_display.vert src/host_shaders/opengl_vertex_shader.vert
src/host_shaders/opengl_fragment_shader.frag
src/core/renderer_gl/gl_state.cpp src/core/renderer_gl/async_compiler.cpp
src/host_shaders/opengl_display.frag src/host_shaders/opengl_display.vert
src/host_shaders/opengl_vertex_shader.vert src/host_shaders/opengl_fragment_shader.frag
)
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES})

View file

@ -115,7 +115,7 @@ namespace PICA {
bumpSelector = Helpers::getBits<22, 2>(config0);
clampHighlights = Helpers::getBit<27>(config0);
bumpMode = Helpers::getBits<28, 2>(config0);
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
bumpRenorm = Helpers::getBit<30>(config0) ^ 1; // 0 = enable so flip it with xor
for (int i = 0; i < totalLightCount; i++) {
auto& light = lights[i];
@ -206,6 +206,27 @@ namespace PICA {
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
}
FragmentConfig& operator=(const FragmentConfig& config) {
// BitField copy constructor is deleted for reasons, so we have to do this manually
outConfig.raw = config.outConfig.raw;
texConfig = config.texConfig;
fogConfig.raw = config.fogConfig.raw;
lighting.raw = config.lighting.raw;
for (int i = 0; i < 7; i++) {
lighting.luts[i].raw = config.lighting.luts[i].raw;
}
for (int i = 0; i < 8; i++) {
lighting.lights[i].raw = config.lighting.lights[i].raw;
}
// If this fails you probably added a new field to the struct and forgot to update the copy constructor
static_assert(
sizeof(FragmentConfig) == sizeof(outConfig.raw) + sizeof(texConfig) + sizeof(fogConfig.raw) + sizeof(lighting.raw) +
7 * sizeof(LightingLUTConfig) + 8 * sizeof(Light)
);
return *this;
}
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);

View file

@ -13,17 +13,17 @@ struct EmulatorConfig {
static constexpr bool shaderJitDefault = false;
#endif
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
// For now, use specialized shaders by default on MacOS as M1 drivers are buggy when using the ubershader, and on Android since mobile GPUs are
// horrible. On other platforms we default to ubershader + shadergen fallback for lights
#if defined(__ANDROID__) || defined(__APPLE__)
static constexpr bool ubershaderDefault = false;
static constexpr ShaderMode defaultShaderMode = ShaderMode::Specialized;
#else
static constexpr bool ubershaderDefault = true;
static constexpr ShaderMode defaultShaderMode = ShaderMode::Ubershader;
#endif
bool shaderJitEnabled = shaderJitDefault;
bool discordRpcEnabled = false;
bool useUbershaders = ubershaderDefault;
ShaderMode shaderMode = defaultShaderMode;
bool accurateShaderMul = false;
// Toggles whether to force shadergen when there's more than N lights active and we're using the ubershader, for better performance

View file

@ -55,7 +55,7 @@ class Emulator {
static constexpr u32 width = 400;
static constexpr u32 height = 240 * 2; // * 2 because 2 screens
ROMType romType = ROMType::None;
bool running = false; // Is the emulator running a game?
bool running = false; // Is the emulator running a game?
private:
#ifdef PANDA3DS_ENABLE_HTTP_SERVER
@ -109,7 +109,7 @@ class Emulator {
#ifdef PANDA3DS_FRONTEND_QT
// For passing the GL context from Qt to the renderer
void initGraphicsContext(GL::Context* glContext) { gpu.initGraphicsContext(nullptr); }
void initGraphicsContext(GL::Context* glContext) { gpu.initGraphicsContext(glContext); }
#else
void initGraphicsContext(SDL_Window* window) { gpu.initGraphicsContext(window); }
#endif

View file

@ -1,8 +1,8 @@
#pragma once
#include <array>
#include <optional>
#include <span>
#include <string>
#include <optional>
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
@ -20,6 +20,12 @@ enum class RendererType : s8 {
Software = 3,
};
enum class ShaderMode {
Specialized,
Ubershader,
Hybrid,
};
struct EmulatorConfig;
class GPU;
struct SDL_Window;
@ -56,6 +62,8 @@ class Renderer {
static constexpr u32 vertexBufferSize = 0x10000;
static std::optional<RendererType> typeFromString(std::string inString);
static const char* typeToString(RendererType rendererType);
static std::optional<ShaderMode> shaderModeFromString(std::string inString);
static const char* shaderModeToString(ShaderMode shaderMode);
virtual void reset() = 0;
virtual void display() = 0; // Display the 3DS screen contents to the window
@ -77,7 +85,7 @@ class Renderer {
virtual std::string getUbershader() { return ""; }
virtual void setUbershader(const std::string& shader) {}
virtual void setUbershaderSetting(bool value) {}
virtual void setShaderMode(ShaderMode shaderMode) {}
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
#ifdef PANDA3DS_FRONTEND_QT

View file

@ -0,0 +1,50 @@
#pragma once
#include <atomic>
#include <thread>
#include "PICA/pica_frag_config.hpp"
#include "lockfree/spsc/queue.hpp"
#include "opengl.hpp"
#include "renderer_gl/renderer_gl.hpp"
namespace PICA::ShaderGen {
class FragmentGenerator;
}
namespace AsyncCompiler {
void* createContext(void* userdata);
void makeCurrent(void* userdata, void* context);
void destroyContext(void* context);
} // namespace AsyncCompiler
struct CompilingProgram {
CachedProgram* program;
PICA::FragmentConfig* config;
};
struct AsyncCompilerThread {
explicit AsyncCompilerThread(PICA::ShaderGen::FragmentGenerator& fragShaderGen, void* userdata);
~AsyncCompilerThread();
// Called from the emulator thread to queue a fragment configuration for compilation
// Returns false if the queue is full, true otherwise
void PushFragmentConfig(const PICA::FragmentConfig& config, CachedProgram* cachedProgram);
// Wait for all queued fragment configurations to be compiled
void Finish();
private:
PICA::ShaderGen::FragmentGenerator& fragShaderGen;
OpenGL::Shader defaultShadergenVs;
// Our lockfree queue only allows for trivial types, so we preallocate enough structs
// to avoid dynamic allocation on each push
int preallocatedProgramsIndex;
static constexpr int preallocatedProgramsSize = 256;
std::array<CompilingProgram*, preallocatedProgramsSize> preallocatedPrograms;
lockfree::spsc::Queue<CompilingProgram*, preallocatedProgramsSize - 1> programQueue;
std::atomic_bool running;
std::atomic_bool hasWork;
std::thread thread;
};

View file

@ -12,6 +12,7 @@
#include "PICA/pica_vertex.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
#include "config.hpp"
#include "gl_state.hpp"
#include "helpers.hpp"
#include "logger.hpp"
@ -22,6 +23,15 @@
// More circular dependencies!
class GPU;
// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
std::atomic_bool compiling = false;
bool needsInitialization = true;
};
struct AsyncCompilerThread;
class RendererGL final : public Renderer {
GLStateManager gl = {};
@ -30,9 +40,9 @@ class RendererGL final : public Renderer {
OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo;
bool enableUbershader = true;
ShaderMode shaderMode = EmulatorConfig::defaultShaderMode;
// Data
// Data
struct {
// TEV configuration uniform locations
GLint textureEnvSourceLoc = -1;
@ -71,12 +81,10 @@ class RendererGL final : public Renderer {
OpenGL::Shader defaultShadergenVs;
GLuint shadergenFragmentUBO;
// Cached recompiled fragment shader
struct CachedProgram {
OpenGL::Program program;
};
std::unordered_map<PICA::FragmentConfig, CachedProgram> shaderCache;
AsyncCompilerThread* asyncCompiler = nullptr;
OpenGL::Framebuffer getColourFBO();
OpenGL::Texture getTexture(Texture& tex);
OpenGL::Program& getSpecializedShader();
@ -104,15 +112,15 @@ class RendererGL final : public Renderer {
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
void deinitGraphicsContext() override;
virtual bool supportsShaderReload() override { return true; }
virtual std::string getUbershader() override;
virtual void setUbershader(const std::string& shader) override;
virtual void setUbershaderSetting(bool value) override { enableUbershader = value; }
virtual void setShaderMode(ShaderMode mode) override { shaderMode = mode; }
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
// Note: The caller is responsible for deleting the currently bound FBO before calling this
@ -122,7 +130,7 @@ class RendererGL final : public Renderer {
void initUbershader(OpenGL::Program& program);
#ifdef PANDA3DS_FRONTEND_QT
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
void initGraphicsContext(GL::Context* context) override;
#endif
// Take a screenshot of the screen and store it in a file

View file

@ -60,9 +60,18 @@ void EmulatorConfig::load() {
rendererType = RendererType::OpenGL;
}
auto shaderModeName = toml::find_or<std::string>(gpu, "ShaderMode", Renderer::shaderModeToString(defaultShaderMode));
auto configShaderMode = Renderer::shaderModeFromString(shaderModeName);
if (configShaderMode.has_value()) {
shaderMode = configShaderMode.value();
} else {
Helpers::warn("Invalid shader mode specified: %s\n", shaderModeName.c_str());
shaderMode = defaultShaderMode;
}
shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", shaderJitDefault);
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
@ -127,12 +136,12 @@ void EmulatorConfig::save() {
data["General"]["EnableDiscordRPC"] = discordRpcEnabled;
data["General"]["UsePortableBuild"] = usePortableBuild;
data["General"]["DefaultRomPath"] = defaultRomPath.string();
data["GPU"]["EnableShaderJIT"] = shaderJitEnabled;
data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType));
data["GPU"]["EnableVSync"] = vsyncEnabled;
data["GPU"]["AccurateShaderMultiplication"] = accurateShaderMul;
data["GPU"]["UseUbershaders"] = useUbershaders;
data["GPU"]["ShaderMode"] = std::string(Renderer::shaderModeToString(shaderMode));
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;

View file

@ -117,7 +117,7 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0;
renderer->setUbershaderSetting(config.useUbershaders);
renderer->setShaderMode(config.shaderMode);
renderer->reset();
}
@ -365,7 +365,7 @@ PICA::Vertex GPU::getImmediateModeVertex() {
// Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute
shaderUnit.vs.run();
// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {

View file

@ -0,0 +1,72 @@
#include "renderer_gl/async_compiler.hpp"
AsyncCompilerThread::AsyncCompilerThread(PICA::ShaderGen::FragmentGenerator& fragShaderGen, void* userdata) : fragShaderGen(fragShaderGen) {
preallocatedProgramsIndex = 0;
running.store(true);
for (int i = 0; i < preallocatedProgramsSize; i++) {
preallocatedPrograms[i] = new CompilingProgram();
preallocatedPrograms[i]->config = new PICA::FragmentConfig({});
}
// The context needs to be created on the main thread so that we can make it shared with that
// thread's context
void* context = AsyncCompiler::createContext(userdata);
thread = std::thread([this, userdata, context]() {
AsyncCompiler::makeCurrent(userdata, context);
printf("Async compiler started, GL version: %s\n", glGetString(GL_VERSION));
std::string defaultShadergenVSSource = this->fragShaderGen.getDefaultVertexShader();
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
while (running.load()) {
CompilingProgram* item;
while (programQueue.Pop(item)) {
OpenGL::Program& glProgram = item->program->program;
std::string fs = this->fragShaderGen.generate(*item->config);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
glProgram.create({defaultShadergenVs, fragShader});
item->program->compiling.store(false);
fragShader.free();
}
hasWork.store(false);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
AsyncCompiler::destroyContext(context);
});
}
AsyncCompilerThread::~AsyncCompilerThread() {
running.store(false);
thread.join();
for (int i = 0; i < preallocatedProgramsSize; i++) {
delete preallocatedPrograms[i]->config;
delete preallocatedPrograms[i];
}
}
void AsyncCompilerThread::PushFragmentConfig(const PICA::FragmentConfig& config, CachedProgram* cachedProgram) {
CompilingProgram* newProgram = preallocatedPrograms[preallocatedProgramsIndex];
newProgram->program = cachedProgram;
*newProgram->config = config;
preallocatedProgramsIndex = (preallocatedProgramsIndex + 1) % preallocatedProgramsSize;
bool pushed = programQueue.Push(newProgram);
if (!pushed) {
Helpers::warn("AsyncCompilerThread: Queue full, spinning");
while (!pushed) {
pushed = programQueue.Push(newProgram);
}
}
}
void AsyncCompilerThread::Finish() {
hasWork.store(true);
// Wait for the compiler thread to finish any outstanding work
while (hasWork.load()) {}
}

View file

@ -9,6 +9,7 @@
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
#include "PICA/regs.hpp"
#include "renderer_gl/async_compiler.hpp"
#include "math_util.hpp"
CMRC_DECLARE(RendererGL);
@ -172,9 +173,23 @@ void RendererGL::initGraphicsContextInternal() {
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
}
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
// So we just call initGraphicsContextInternal for both
void RendererGL::initGraphicsContext([[maybe_unused]] SDL_Window* window) { initGraphicsContextInternal(); }
void RendererGL::initGraphicsContext(SDL_Window* context) {
if (shaderMode == ShaderMode::Hybrid) {
asyncCompiler = new AsyncCompilerThread(fragShaderGen, context);
}
initGraphicsContextInternal();
}
#ifdef PANDA3DS_FRONTEND_QT
void RendererGL::initGraphicsContext(GL::Context* context) {
if (shaderMode == ShaderMode::Hybrid) {
asyncCompiler = new AsyncCompilerThread(fragShaderGen, context);
}
initGraphicsContextInternal();
}
#endif
// Set up the OpenGL blending context to match the emulated PICA
void RendererGL::setupBlending() {
@ -414,23 +429,38 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
bool usingUbershader = enableUbershader;
if (usingUbershader) {
if (shaderMode == ShaderMode::Ubershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
} else {
gl.useProgram(triangleProgram);
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
} else if (shaderMode == ShaderMode::Specialized) {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
} else if (shaderMode == ShaderMode::Hybrid) {
PICA::FragmentConfig fsConfig(regs);
auto cachedProgram = shaderCache.find(fsConfig);
if (cachedProgram == shaderCache.end()) {
CachedProgram& program = shaderCache[fsConfig];
program.compiling.store(true);
asyncCompiler->PushFragmentConfig(fsConfig, &program);
gl.useProgram(triangleProgram);
} else if (cachedProgram->second.compiling.load(std::memory_order_relaxed)) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
} else {
Helpers::panic("Invalid shader mode");
}
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
@ -458,7 +488,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
// Update ubershader uniforms
if (usingUbershader) {
if (gl.currentProgram == triangleProgram.handle()) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
@ -844,14 +874,20 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
if (shaderMode == ShaderMode::Hybrid) [[unlikely]] {
Helpers::panic("Compiling shaders in main thread, this should never happen");
}
std::string fs = fragShaderGen.generate(fsConfig);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({defaultShadergenVs, fragShader});
gl.useProgram(program);
fragShader.free();
}
if (programEntry.needsInitialization) {
gl.useProgram(program);
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
@ -862,6 +898,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
// As it's an OpenGL 4.2 feature that MacOS doesn't support...
uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding);
programEntry.needsInitialization = false;
}
glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, shadergenFragmentUBO);
@ -979,6 +1016,11 @@ void RendererGL::screenshot(const std::string& name) {
}
void RendererGL::clearShaderCache() {
if (asyncCompiler != nullptr && shaderMode == ShaderMode::Hybrid) {
// May contain objects that are still in use, so we need to clear them first
asyncCompiler->Finish();
}
for (auto& shader : shaderCache) {
CachedProgram& cachedProgram = shader.second;
cachedProgram.program.free();

View file

@ -162,3 +162,12 @@ HC_API const char* getInfo(hydra::InfoType type) {
default: return nullptr;
}
}
namespace AsyncCompiler {
void* createContext(void* mainContext) {
return nullptr;
}
void makeCurrent(void* mainContext, void* context) {}
void destroyContext(void* context) {}
} // namespace AsyncCompiler

View file

@ -4,10 +4,10 @@
#include <stdexcept>
#include "android_utils.hpp"
#include "emulator.hpp"
#include "renderer_gl/renderer_gl.hpp"
#include "services/hid.hpp"
#include "android_utils.hpp"
std::unique_ptr<Emulator> emulator = nullptr;
HIDService* hidService = nullptr;
@ -40,17 +40,17 @@ JNIEnv* jniEnv() {
extern "C" {
#define MAKE_SETTING(functionName, type, settingName) \
AlberFunction(void, functionName) (JNIEnv* env, jobject obj, type value) { emulator->getConfig().settingName = value; }
AlberFunction(void, functionName)(JNIEnv * env, jobject obj, type value) { emulator->getConfig().settingName = value; }
MAKE_SETTING(setShaderJitEnabled, jboolean, shaderJitEnabled)
#undef MAKE_SETTING
AlberFunction(void, Setup)(JNIEnv* env, jobject obj) {
env->GetJavaVM(&jvm);
env->GetJavaVM(&jvm);
alberClass = (jclass)env->NewGlobalRef((jclass)env->FindClass("com/panda3ds/pandroid/AlberDriver"));
alberClassOpenDocument = env->GetStaticMethodID(alberClass, "openDocument", "(Ljava/lang/String;Ljava/lang/String;)I");
alberClass = (jclass)env->NewGlobalRef((jclass)env->FindClass("com/panda3ds/pandroid/AlberDriver"));
alberClassOpenDocument = env->GetStaticMethodID(alberClass, "openDocument", "(Ljava/lang/String;Ljava/lang/String;)I");
}
AlberFunction(void, Pause)(JNIEnv* env, jobject obj) { emulator->pause(); }
@ -128,15 +128,15 @@ AlberFunction(jbyteArray, GetSmdh)(JNIEnv* env, jobject obj) {
#undef AlberFunction
int AndroidUtils::openDocument(const char* path, const char* perms) {
auto env = jniEnv();
auto env = jniEnv();
jstring uri = env->NewStringUTF(path);
jstring jmode = env->NewStringUTF(perms);
jstring uri = env->NewStringUTF(path);
jstring jmode = env->NewStringUTF(perms);
jint result = env->CallStaticIntMethod(alberClass, alberClassOpenDocument, uri, jmode);
jint result = env->CallStaticIntMethod(alberClass, alberClassOpenDocument, uri, jmode);
env->DeleteLocalRef(uri);
env->DeleteLocalRef(jmode);
env->DeleteLocalRef(uri);
env->DeleteLocalRef(jmode);
return (int)result;
}
return (int)result;
}

View file

@ -150,8 +150,8 @@ static void configInit() {
static const retro_variable values[] = {
{"panda3ds_use_shader_jit", "Enable shader JIT; enabled|disabled"},
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
{"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
{"panda3ds_use_ubershader", EmulatorConfig::defaultShaderMode == ShaderMode::Ubershader ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
@ -180,7 +180,9 @@ static void configUpdate() {
config.sdCardInserted = FetchVariableBool("panda3ds_use_virtual_sd", true);
config.sdWriteProtected = FetchVariableBool("panda3ds_write_protect_virtual_sd", false);
config.accurateShaderMul = FetchVariableBool("panda3ds_accurate_shader_mul", false);
config.useUbershaders = FetchVariableBool("panda3ds_use_ubershader", true);
config.shaderMode = FetchVariableBool("panda3ds_use_ubershader", EmulatorConfig::defaultShaderMode == ShaderMode::Ubershader)
? ShaderMode::Ubershader
: ShaderMode::Specialized;
config.forceShadergenForLights = FetchVariableBool("panda3ds_ubershader_lighting_override", true);
config.lightShadergenThreshold = std::clamp(std::stoi(FetchVariable("panda3ds_ubershader_lighting_override_threshold", "1")), 1, 8);
config.discordRpcEnabled = false;
@ -403,3 +405,13 @@ void retro_cheat_set(uint index, bool enabled, const char* code) {
void retro_cheat_reset() {
emulator->getCheats().reset();
}
namespace AsyncCompiler {
void* createContext(void* mainContext) {
return nullptr;
}
void makeCurrent(void* mainContext, void* context) {}
void destroyContext(void* context) {}
} // namespace AsyncCompiler

View file

@ -6,8 +6,10 @@
#include <cmath>
#include <cstdio>
#include <fstream>
#include <memory>
#include "cheats.hpp"
#include "gl/context.h"
#include "input_mappings.hpp"
#include "services/dsp.hpp"
@ -601,3 +603,32 @@ void MainWindow::pollControllers() {
}
}
}
namespace AsyncCompiler {
void* createContext(void* mainContext) {
GL::Context* glContext = (GL::Context*)mainContext;
// Unlike the SDL function, this doesn't make it current so we don't
// need to call MakeCurrent on the mainContext
WindowInfo wi = glContext->GetWindowInfo();
wi.type = WindowInfo::Type::Surfaceless;
std::unique_ptr<GL::Context> iLoveBeingForcedToUseRAII = glContext->CreateSharedContext(wi);
if (!iLoveBeingForcedToUseRAII) {
Helpers::panic("Failed to create shared GL context");
}
return iLoveBeingForcedToUseRAII.release();
}
void makeCurrent(void* unused, void* context) {
GL::Context* glContext = (GL::Context*)context;
glContext->MakeCurrent();
}
void destroyContext(void* context) {
GL::Context* glContext = (GL::Context*)context;
delete glContext;
}
} // namespace AsyncCompiler

View file

@ -35,6 +35,11 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::Software ? 3 : 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::Software ? 3 : 1);
if (config.shaderMode == ShaderMode::Hybrid) {
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
}
window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
if (window == nullptr) {
@ -46,6 +51,16 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp
Helpers::panic("OpenGL context creation failed: %s", SDL_GetError());
}
if (config.shaderMode == ShaderMode::Hybrid) {
// As per the wiki you should check the value after creating the context
// as it can differ from the requested value
int sharingEnabled;
SDL_GL_GetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, &sharingEnabled);
if (!sharingEnabled) {
Helpers::panic("OpenGL context sharing not enabled");
}
}
if (!gladLoadGLLoader(reinterpret_cast<GLADloadproc>(SDL_GL_GetProcAddress))) {
Helpers::panic("OpenGL init failed");
}
@ -342,3 +357,28 @@ void FrontendSDL::run() {
SDL_GL_SwapWindow(window);
}
}
namespace AsyncCompiler {
void* createContext(void* window) {
SDL_Window* sdlWindow = static_cast<SDL_Window*>(window);
// SDL_GL_CreateContext also makes it the current context so we need to switch back after creation
SDL_GLContext currentContext = SDL_GL_GetCurrentContext();
SDL_GLContext glContext = SDL_GL_CreateContext(sdlWindow);
if (glContext == nullptr) {
Helpers::panic("OpenGL context creation failed: %s", SDL_GetError());
}
SDL_GL_MakeCurrent(sdlWindow, currentContext);
return glContext;
}
void makeCurrent(void* window, void* context) {
SDL_GL_MakeCurrent((SDL_Window*)window, (SDL_GLContext)context);
}
void destroyContext(void* context) {
SDL_GL_DeleteContext(static_cast<SDL_GLContext>(context));
}
}

View file

@ -36,4 +36,34 @@ const char* Renderer::typeToString(RendererType rendererType) {
case RendererType::Software: return "software";
default: return "Invalid";
}
}
std::optional<ShaderMode> Renderer::shaderModeFromString(std::string inString) {
// Transform to lower-case to make the setting case-insensitive
std::transform(inString.begin(), inString.end(), inString.begin(), [](unsigned char c) { return std::tolower(c); });
static const std::unordered_map<std::string, ShaderMode> map = {
{"specialized", ShaderMode::Specialized},
{"special", ShaderMode::Specialized},
{"ubershader", ShaderMode::Ubershader},
{"uber", ShaderMode::Ubershader},
{"hybrid", ShaderMode::Hybrid},
{"threaded", ShaderMode::Hybrid},
{"i hate opengl context creation", ShaderMode::Hybrid},
};
if (auto search = map.find(inString); search != map.end()) {
return search->second;
}
return std::nullopt;
}
const char* Renderer::shaderModeToString(ShaderMode shaderMode) {
switch (shaderMode) {
case ShaderMode::Specialized: return "specialized";
case ShaderMode::Ubershader: return "ubershader";
case ShaderMode::Hybrid: return "hybrid";
default: return "Invalid";
}
}

View file

@ -19,6 +19,14 @@ public:
/// Prevents the function from being invoked when we go out of scope.
ALWAYS_INLINE void Cancel() { m_func.reset(); }
/// Runs the destructor function now instead of when we go out of scope.
ALWAYS_INLINE void Run() {
if (!m_func.has_value()) return;
m_func.value()();
m_func.reset();
}
/// Explicitly fires the function.
ALWAYS_INLINE void Invoke()
{

View file

@ -74,14 +74,7 @@ std::unique_ptr<GL::Context> Context::Create(const WindowInfo& wi, const Version
context = ContextAGL::Create(wi, versions_to_try, num_versions_to_try);
#else
if (wi.type == WindowInfo::Type::X11)
{
const char* use_egl_x11 = std::getenv("USE_EGL_X11");
if (use_egl_x11 && std::strcmp(use_egl_x11, "1") == 0)
context = ContextEGLX11::Create(wi, versions_to_try, num_versions_to_try);
else
context = ContextGLX::Create(wi, versions_to_try, num_versions_to_try);
}
context = ContextEGLX11::Create(wi, versions_to_try, num_versions_to_try);
#ifdef WAYLAND_ENABLED
if (wi.type == WindowInfo::Type::Wayland)
context = ContextEGLWayland::Create(wi, versions_to_try, num_versions_to_try);

View file

@ -20,6 +20,7 @@ std::unique_ptr<Context> ContextEGLX11::CreateSharedContext(const WindowInfo& wi
{
std::unique_ptr<ContextEGLX11> context = std::make_unique<ContextEGLX11>(wi);
context->m_display = m_display;
context->m_supports_surfaceless = m_supports_surfaceless;
if (!context->CreateContextAndSurface(m_version, m_context, false))
return nullptr;

View file

@ -19,6 +19,17 @@ static void* GetProcAddressCallback(const char* name)
}
namespace GL {
static bool ReloadWGL(HDC dc)
{
if (!gladLoadWGL(dc))
{
Log_ErrorPrint("Loading GLAD WGL functions failed");
return false;
}
return true;
}
ContextWGL::ContextWGL(const WindowInfo& wi) : Context(wi) {}
ContextWGL::~ContextWGL()
@ -149,8 +160,8 @@ std::unique_ptr<Context> ContextWGL::CreateSharedContext(const WindowInfo& wi)
}
else
{
Log_ErrorPrint("PBuffer not implemented");
return nullptr;
if (!context->CreatePBuffer())
return nullptr;
}
if (m_version.profile == Profile::NoProfile)
@ -305,6 +316,32 @@ bool ContextWGL::CreatePBuffer()
static constexpr const int pb_attribs[] = {0, 0};
HGLRC temp_rc = nullptr;
ScopedGuard temp_rc_guard([&temp_rc, hdc]() {
if (temp_rc)
{
wglMakeCurrent(hdc, nullptr);
wglDeleteContext(temp_rc);
}
});
if (!GLAD_WGL_ARB_pbuffer)
{
// we're probably running completely surfaceless... need a temporary context.
temp_rc = wglCreateContext(hdc);
if (!temp_rc || !wglMakeCurrent(hdc, temp_rc))
{
Log_ErrorPrint("Failed to create temporary context to load WGL for pbuffer.");
return false;
}
if (!ReloadWGL(hdc) || !GLAD_WGL_ARB_pbuffer)
{
Log_ErrorPrint("Missing WGL_ARB_pbuffer");
return false;
}
}
AssertMsg(m_pixel_format.has_value(), "Has pixel format for pbuffer");
HPBUFFERARB pbuffer = wglCreatePbufferARB(hdc, m_pixel_format.value(), 1, 1, pb_attribs);
if (!pbuffer)
@ -326,6 +363,7 @@ bool ContextWGL::CreatePBuffer()
m_dummy_dc = hdc;
m_pbuffer = pbuffer;
temp_rc_guard.Run();
pbuffer_guard.Cancel();
hdc_guard.Cancel();
hwnd_guard.Cancel();

21
third_party/lockfree/LICENSE vendored Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Djordje Nedic
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

110
third_party/lockfree/lockfree/spsc/queue.hpp vendored Executable file
View file

@ -0,0 +1,110 @@
/**************************************************************
* @file queue.hpp
* @brief A queue implementation written in standard c++11
* suitable for both low-end microcontrollers all the way
* to HPC machines. Lock-free for single consumer single
* producer scenarios.
**************************************************************/
/**************************************************************
* Copyright (c) 2023 Djordje Nedic
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software
* without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to
* whom the Software is furnished to do so, subject to the
* following conditions:
*
* The above copyright notice and this permission notice shall
* be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file is part of lockfree
*
* Author: Djordje Nedic <nedic.djordje2@gmail.com>
* Version: v2.0.9
**************************************************************/
/************************** INCLUDE ***************************/
#ifndef LOCKFREE_QUEUE_HPP
#define LOCKFREE_QUEUE_HPP
#include <atomic>
#include <cstddef>
#include <type_traits>
#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
#include <optional>
#endif
namespace lockfree {
namespace spsc {
/*************************** TYPES ****************************/
template <typename T, size_t size> class Queue {
static_assert(std::is_trivial<T>::value, "The type T must be trivial");
static_assert(size > 2, "Buffer size must be bigger than 2");
/********************** PUBLIC METHODS ************************/
public:
Queue();
/**
* @brief Adds an element into the queue.
* Should only be called from the producer thread.
* @param[in] element
* @retval Operation success
*/
bool Push(const T &element);
/**
* @brief Removes an element from the queue.
* Should only be called from the consumer thread.
* @param[out] element
* @retval Operation success
*/
bool Pop(T &element);
#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
/**
* @brief Removes an element from the queue.
* Should only be called from the consumer thread.
* @retval Either the element or nothing
*/
std::optional<T> PopOptional();
#endif
/********************** PRIVATE MEMBERS ***********************/
private:
T _data[size]; /**< Data array */
#if LOCKFREE_CACHE_COHERENT
alignas(LOCKFREE_CACHELINE_LENGTH) std::atomic_size_t _r; /**< Read index */
alignas(
LOCKFREE_CACHELINE_LENGTH) std::atomic_size_t _w; /**< Write index */
#else
std::atomic_size_t _r; /**< Read index */
std::atomic_size_t _w; /**< Write index */
#endif
};
} /* namespace spsc */
} /* namespace lockfree */
/************************** INCLUDE ***************************/
/* Include the implementation */
#include "queue_impl.hpp"
#endif /* LOCKFREE_QUEUE_HPP */

View file

@ -0,0 +1,111 @@
/**************************************************************
* @file queue_impl.hpp
* @brief A queue implementation written in standard c++11
* suitable for both low-end microcontrollers all the way
* to HPC machines. Lock-free for single consumer single
* producer scenarios.
**************************************************************/
/**************************************************************
* Copyright (c) 2023 Djordje Nedic
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software
* without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to
* whom the Software is furnished to do so, subject to the
* following conditions:
*
* The above copyright notice and this permission notice shall
* be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* This file is part of lockfree
*
* Author: Djordje Nedic <nedic.djordje2@gmail.com>
* Version: v2.0.9
**************************************************************/
namespace lockfree {
namespace spsc {
/********************** PUBLIC METHODS ************************/
template <typename T, size_t size> Queue<T, size>::Queue() : _r(0U), _w(0U) {}
template <typename T, size_t size> bool Queue<T, size>::Push(const T &element) {
/*
The full check needs to be performed using the next write index not to
miss the case when the read index wrapped and write index is at the end
*/
const size_t w = _w.load(std::memory_order_relaxed);
size_t w_next = w + 1;
if (w_next == size) {
w_next = 0U;
}
/* Full check */
const size_t r = _r.load(std::memory_order_acquire);
if (w_next == r) {
return false;
}
/* Place the element */
_data[w] = element;
/* Store the next write index */
_w.store(w_next, std::memory_order_release);
return true;
}
template <typename T, size_t size> bool Queue<T, size>::Pop(T &element) {
/* Preload indexes with adequate memory ordering */
size_t r = _r.load(std::memory_order_relaxed);
const size_t w = _w.load(std::memory_order_acquire);
/* Empty check */
if (r == w) {
return false;
}
/* Remove the element */
element = _data[r];
/* Increment the read index */
r++;
if (r == size) {
r = 0U;
}
/* Store the read index */
_r.store(r, std::memory_order_release);
return true;
}
/********************* std::optional API **********************/
#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
template <typename T, size_t size>
std::optional<T> Queue<T, size>::PopOptional() {
T element;
bool result = Pop(element);
if (result) {
return element;
} else {
return {};
}
}
#endif
} /* namespace spsc */
} /* namespace lockfree */