Actually implement the damn thing

This commit is contained in:
offtkp 2024-08-08 16:39:59 +03:00
parent c396b3f225
commit 67069a8826
7 changed files with 274 additions and 21 deletions

View file

@ -0,0 +1,83 @@
#include "renderer_gl/async_compiler.hpp"
AsyncCompilerThread::AsyncCompilerThread(PICA::ShaderGen::FragmentGenerator& fragShaderGen, void* userdata)
: fragShaderGen(fragShaderGen)
{
preallocatedProgramsIndex = 0;
running.store(true);
for (int i = 0; i < preallocatedProgramsSize; i++)
{
preallocatedPrograms[i] = new CompilingProgram();
preallocatedPrograms[i]->config = new PICA::FragmentConfig({});
}
// The context needs to be created on the main thread so that we can make it shared with that
// thread's context
void* context = AsyncCompiler::createContext(userdata);
thread = std::thread([this, userdata, context]()
{
AsyncCompiler::makeCurrent(userdata, context);
printf("Async compiler started, GL version: %s\n", glGetString(GL_VERSION));
std::string defaultShadergenVSSource = this->fragShaderGen.getDefaultVertexShader();
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
while (running.load())
{
CompilingProgram* item;
while (programQueue.Pop(item)) {
OpenGL::Program& glProgram = item->program->program;
std::string fs = this->fragShaderGen.generate(*item->config);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
glProgram.create({defaultShadergenVs, fragShader});
item->program->compiling.store(false);
fragShader.free();
}
hasWork.clear();
std::this_thread::yield();
}
AsyncCompiler::destroyContext(context);
});
}
AsyncCompilerThread::~AsyncCompilerThread()
{
running.store(false);
thread.join();
for (int i = 0; i < preallocatedProgramsSize; i++)
{
delete preallocatedPrograms[i]->config;
delete preallocatedPrograms[i];
}
}
void AsyncCompilerThread::PushFragmentConfig(const PICA::FragmentConfig& config, CachedProgram* cachedProgram)
{
CompilingProgram* newProgram = preallocatedPrograms[preallocatedProgramsIndex];
newProgram->program = cachedProgram;
*newProgram->config = config;
preallocatedProgramsIndex = (preallocatedProgramsIndex + 1) % preallocatedProgramsSize;
bool pushed = programQueue.Push(newProgram);
if (!pushed) {
Helpers::warn("AsyncCompilerThread: Queue full, spinning");
} else {
return;
}
while (!pushed) {
pushed = programQueue.Push(newProgram);
}
}
void AsyncCompilerThread::Finish()
{
hasWork.test_and_set();
// Wait for the compiler thread to finish any outstanding work
while (hasWork.test_and_set()) {}
}

View file

@ -9,6 +9,7 @@
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
#include "PICA/regs.hpp"
#include "renderer_gl/async_compiler.hpp"
#include "math_util.hpp"
CMRC_DECLARE(RendererGL);
@ -172,9 +173,18 @@ void RendererGL::initGraphicsContextInternal() {
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
}
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
// So we just call initGraphicsContextInternal for both
void RendererGL::initGraphicsContext([[maybe_unused]] SDL_Window* window) { initGraphicsContextInternal(); }
#ifdef PANDA3DS_FRONTEND_QT
void RendererGL::initGraphicsContext(GL::Context* context)
#elif defined(PANDA3DS_FRONTEND_SDL)
void RendererGL::initGraphicsContext(SDL_Window* context)
#endif
{
if (shaderMode == ShaderMode::Hybrid) {
asyncCompiler = new AsyncCompilerThread(fragShaderGen, context);
}
initGraphicsContextInternal();
}
// Set up the OpenGL blending context to match the emulated PICA
void RendererGL::setupBlending() {
@ -414,15 +424,46 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
bool usingUbershader = shaderMode == ShaderMode::Ubershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
bool usingUbershader;
switch (shaderMode) {
case ShaderMode::Ubershader: {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
} else {
usingUbershader = true;
}
break;
}
case ShaderMode::Specialized: {
usingUbershader = false;
break;
}
case ShaderMode::Hybrid: {
PICA::FragmentConfig fsConfig(regs); // TODO: introduce code duplication to make sure this constructor/lookup isn't done too many times
auto cachedProgram = shaderCache.find(fsConfig);
if (cachedProgram == shaderCache.end()) {
CachedProgram& program = shaderCache[fsConfig];
program.compiling.store(true);
asyncCompiler->PushFragmentConfig(fsConfig, &program);
usingUbershader = true;
} else if (cachedProgram->second.compiling.load(std::memory_order_relaxed)) {
usingUbershader = true;
} else {
usingUbershader = false;
}
break;
}
default: {
Helpers::panic("Invalid shader mode");
break;
}
}
@ -844,14 +885,20 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
if (shaderMode == ShaderMode::Hybrid) {
Helpers::panic("Compiling shaders in main thread, this should never happen");
}
std::string fs = fragShaderGen.generate(fsConfig);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({defaultShadergenVs, fragShader});
gl.useProgram(program);
fragShader.free();
}
if (programEntry.needsInitialization) {
gl.useProgram(program);
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
@ -862,6 +909,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
// As it's an OpenGL 4.2 feature that MacOS doesn't support...
uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding);
programEntry.needsInitialization = false;
}
glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, shadergenFragmentUBO);
@ -979,6 +1027,11 @@ void RendererGL::screenshot(const std::string& name) {
}
void RendererGL::clearShaderCache() {
if (asyncCompiler && shaderMode == ShaderMode::Hybrid) {
// May contain objects that are still in use, so we need to clear them first
asyncCompiler->Finish();
}
for (auto& shader : shaderCache) {
CachedProgram& cachedProgram = shader.second;
cachedProgram.program.free();

View file

@ -35,6 +35,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::Software ? 3 : 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::Software ? 3 : 1);
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
if (window == nullptr) {
@ -342,3 +343,38 @@ void FrontendSDL::run() {
SDL_GL_SwapWindow(window);
}
}
namespace AsyncCompiler {
void* createContext(void* window) {
SDL_Window* sdlWindow = static_cast<SDL_Window*>(window);
// SDL_GL_CreateContext also makes it the current context so we need to switch back after creation
SDL_GLContext currentContext = SDL_GL_GetCurrentContext();
SDL_GLContext glContext = SDL_GL_CreateContext(sdlWindow);
if (glContext == nullptr) {
Helpers::panic("OpenGL context creation failed: %s", SDL_GetError());
}
// As per the wiki you should check the value after creating the context
// as it can differ from the requested value
int sharingEnabled;
SDL_GL_GetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, &sharingEnabled);
if (!sharingEnabled) {
Helpers::panic("OpenGL context sharing not enabled");
}
SDL_GL_MakeCurrent(sdlWindow, currentContext);
return glContext;
}
void makeCurrent(void* window, void* context) {
SDL_GL_MakeCurrent((SDL_Window*)window, (SDL_GLContext)context);
}
void destroyContext(void* context) {
SDL_GL_DeleteContext(static_cast<SDL_GLContext>(context));
}
}