mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-07 14:45:41 +12:00
Actually implement the damn thing
This commit is contained in:
parent
c396b3f225
commit
67069a8826
7 changed files with 274 additions and 21 deletions
|
@ -79,6 +79,7 @@ include_directories(third_party/stb)
|
|||
include_directories(third_party/opengl)
|
||||
include_directories(third_party/miniaudio)
|
||||
include_directories(third_party/mio/single_include)
|
||||
include_directories(third_party/lockfree)
|
||||
|
||||
add_compile_definitions(NOMINMAX) # Make windows.h not define min/max macros because third-party deps don't like it
|
||||
add_compile_definitions(WIN32_LEAN_AND_MEAN) # Make windows.h not include literally everything
|
||||
|
@ -325,14 +326,14 @@ if(ENABLE_OPENGL)
|
|||
set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp
|
||||
include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp
|
||||
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
|
||||
include/renderer_gl/gl_state.hpp
|
||||
include/renderer_gl/gl_state.hpp include/renderer_gl/async_compiler.hpp
|
||||
)
|
||||
|
||||
set(RENDERER_GL_SOURCE_FILES src/core/renderer_gl/renderer_gl.cpp
|
||||
src/core/renderer_gl/textures.cpp src/core/renderer_gl/etc1.cpp
|
||||
src/core/renderer_gl/gl_state.cpp src/host_shaders/opengl_display.frag
|
||||
src/host_shaders/opengl_display.vert src/host_shaders/opengl_vertex_shader.vert
|
||||
src/host_shaders/opengl_fragment_shader.frag
|
||||
src/core/renderer_gl/gl_state.cpp src/core/renderer_gl/async_compiler.cpp
|
||||
src/host_shaders/opengl_display.frag src/host_shaders/opengl_display.vert
|
||||
src/host_shaders/opengl_vertex_shader.vert src/host_shaders/opengl_fragment_shader.frag
|
||||
)
|
||||
|
||||
set(HEADER_FILES ${HEADER_FILES} ${RENDERER_GL_INCLUDE_FILES})
|
||||
|
|
|
@ -206,6 +206,24 @@ namespace PICA {
|
|||
return std::memcmp(this, &config, sizeof(FragmentConfig)) == 0;
|
||||
}
|
||||
|
||||
FragmentConfig& operator=(const FragmentConfig& config) {
|
||||
// BitField copy constructor is deleted for reasons, so we have to do this manually
|
||||
outConfig.raw = config.outConfig.raw;
|
||||
texConfig = config.texConfig;
|
||||
fogConfig.raw = config.fogConfig.raw;
|
||||
lighting.raw = config.lighting.raw;
|
||||
for (int i = 0; i < 7; i++) {
|
||||
lighting.luts[i].raw = config.lighting.luts[i].raw;
|
||||
}
|
||||
for (int i = 0; i < 8; i++) {
|
||||
lighting.lights[i].raw = config.lighting.lights[i].raw;
|
||||
}
|
||||
|
||||
// If this fails you probably added a new field to the struct and forgot to update the copy constructor
|
||||
static_assert(sizeof(FragmentConfig) == sizeof(outConfig.raw) + sizeof(texConfig) + sizeof(fogConfig.raw) + sizeof(lighting.raw) + 7 * sizeof(LightingLUTConfig) + 8 * sizeof(Light));
|
||||
return *this;
|
||||
}
|
||||
|
||||
FragmentConfig(const std::array<u32, 0x300>& regs) : lighting(regs) {
|
||||
auto alphaTestConfig = regs[InternalRegs::AlphaTestConfig];
|
||||
auto alphaTestFunction = Helpers::getBits<4, 3>(alphaTestConfig);
|
||||
|
|
54
include/renderer_gl/async_compiler.hpp
Normal file
54
include/renderer_gl/async_compiler.hpp
Normal file
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include "opengl.hpp"
|
||||
#include "renderer_gl/renderer_gl.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "lockfree/spsc/queue.hpp"
|
||||
|
||||
namespace PICA::ShaderGen
|
||||
{
|
||||
class FragmentGenerator;
|
||||
}
|
||||
|
||||
namespace AsyncCompiler
|
||||
{
|
||||
void* createContext(void* userdata);
|
||||
void makeCurrent(void* userdata, void* context);
|
||||
void destroyContext(void* context);
|
||||
}
|
||||
|
||||
struct CompilingProgram
|
||||
{
|
||||
CachedProgram* program;
|
||||
PICA::FragmentConfig* config;
|
||||
};
|
||||
|
||||
struct AsyncCompilerThread
|
||||
{
|
||||
explicit AsyncCompilerThread(PICA::ShaderGen::FragmentGenerator& fragShaderGen, void* userdata);
|
||||
~AsyncCompilerThread();
|
||||
|
||||
// Called from the emulator thread to queue a fragment configuration for compilation
|
||||
// Returns false if the queue is full, true otherwise
|
||||
void PushFragmentConfig(const PICA::FragmentConfig& config, CachedProgram* cachedProgram);
|
||||
|
||||
// Wait for all queued fragment configurations to be compiled
|
||||
void Finish();
|
||||
|
||||
private:
|
||||
PICA::ShaderGen::FragmentGenerator& fragShaderGen;
|
||||
OpenGL::Shader defaultShadergenVs;
|
||||
|
||||
// Our lockfree queue only allows for trivial types, so we preallocate enough structs
|
||||
// to avoid dynamic allocation on each push
|
||||
int preallocatedProgramsIndex;
|
||||
static constexpr int preallocatedProgramsSize = 256;
|
||||
std::array<CompilingProgram*, preallocatedProgramsSize> preallocatedPrograms;
|
||||
lockfree::spsc::Queue<CompilingProgram*, preallocatedProgramsSize - 1> programQueue;
|
||||
std::atomic_bool running;
|
||||
std::atomic_flag hasWork = ATOMIC_FLAG_INIT;
|
||||
std::thread thread;
|
||||
};
|
|
@ -23,6 +23,15 @@
|
|||
// More circular dependencies!
|
||||
class GPU;
|
||||
|
||||
// Cached recompiled fragment shader
|
||||
struct CachedProgram {
|
||||
OpenGL::Program program;
|
||||
std::atomic_bool compiling = false;
|
||||
bool needsInitialization = true;
|
||||
};
|
||||
|
||||
struct AsyncCompilerThread;
|
||||
|
||||
class RendererGL final : public Renderer {
|
||||
GLStateManager gl = {};
|
||||
|
||||
|
@ -72,12 +81,10 @@ class RendererGL final : public Renderer {
|
|||
OpenGL::Shader defaultShadergenVs;
|
||||
GLuint shadergenFragmentUBO;
|
||||
|
||||
// Cached recompiled fragment shader
|
||||
struct CachedProgram {
|
||||
OpenGL::Program program;
|
||||
};
|
||||
std::unordered_map<PICA::FragmentConfig, CachedProgram> shaderCache;
|
||||
|
||||
AsyncCompilerThread* asyncCompiler = nullptr;
|
||||
|
||||
OpenGL::Framebuffer getColourFBO();
|
||||
OpenGL::Texture getTexture(Texture& tex);
|
||||
OpenGL::Program& getSpecializedShader();
|
||||
|
@ -101,7 +108,6 @@ class RendererGL final : public Renderer {
|
|||
|
||||
void reset() override;
|
||||
void display() override; // Display the 3DS screen contents to the window
|
||||
void initGraphicsContext(SDL_Window* window) override; // Initialize graphics context
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
|
@ -123,7 +129,9 @@ class RendererGL final : public Renderer {
|
|||
void initUbershader(OpenGL::Program& program);
|
||||
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
virtual void initGraphicsContext([[maybe_unused]] GL::Context* context) override { initGraphicsContextInternal(); }
|
||||
virtual void initGraphicsContext(GL::Context* context) override;
|
||||
#elif defined(PANDA3DS_FRONTEND_SDL)
|
||||
virtual void initGraphicsContext(SDL_Window* window) override;
|
||||
#endif
|
||||
|
||||
// Take a screenshot of the screen and store it in a file
|
||||
|
|
83
src/core/renderer_gl/async_compiler.cpp
Normal file
83
src/core/renderer_gl/async_compiler.cpp
Normal file
|
@ -0,0 +1,83 @@
|
|||
#include "renderer_gl/async_compiler.hpp"
|
||||
|
||||
AsyncCompilerThread::AsyncCompilerThread(PICA::ShaderGen::FragmentGenerator& fragShaderGen, void* userdata)
|
||||
: fragShaderGen(fragShaderGen)
|
||||
{
|
||||
preallocatedProgramsIndex = 0;
|
||||
running.store(true);
|
||||
|
||||
for (int i = 0; i < preallocatedProgramsSize; i++)
|
||||
{
|
||||
preallocatedPrograms[i] = new CompilingProgram();
|
||||
preallocatedPrograms[i]->config = new PICA::FragmentConfig({});
|
||||
}
|
||||
|
||||
// The context needs to be created on the main thread so that we can make it shared with that
|
||||
// thread's context
|
||||
void* context = AsyncCompiler::createContext(userdata);
|
||||
thread = std::thread([this, userdata, context]()
|
||||
{
|
||||
AsyncCompiler::makeCurrent(userdata, context);
|
||||
printf("Async compiler started, GL version: %s\n", glGetString(GL_VERSION));
|
||||
|
||||
std::string defaultShadergenVSSource = this->fragShaderGen.getDefaultVertexShader();
|
||||
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
|
||||
|
||||
while (running.load())
|
||||
{
|
||||
CompilingProgram* item;
|
||||
while (programQueue.Pop(item)) {
|
||||
OpenGL::Program& glProgram = item->program->program;
|
||||
std::string fs = this->fragShaderGen.generate(*item->config);
|
||||
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
||||
glProgram.create({defaultShadergenVs, fragShader});
|
||||
item->program->compiling.store(false);
|
||||
fragShader.free();
|
||||
}
|
||||
|
||||
hasWork.clear();
|
||||
std::this_thread::yield();
|
||||
}
|
||||
|
||||
AsyncCompiler::destroyContext(context);
|
||||
});
|
||||
}
|
||||
|
||||
AsyncCompilerThread::~AsyncCompilerThread()
|
||||
{
|
||||
running.store(false);
|
||||
thread.join();
|
||||
|
||||
for (int i = 0; i < preallocatedProgramsSize; i++)
|
||||
{
|
||||
delete preallocatedPrograms[i]->config;
|
||||
delete preallocatedPrograms[i];
|
||||
}
|
||||
}
|
||||
|
||||
void AsyncCompilerThread::PushFragmentConfig(const PICA::FragmentConfig& config, CachedProgram* cachedProgram)
|
||||
{
|
||||
CompilingProgram* newProgram = preallocatedPrograms[preallocatedProgramsIndex];
|
||||
newProgram->program = cachedProgram;
|
||||
*newProgram->config = config;
|
||||
preallocatedProgramsIndex = (preallocatedProgramsIndex + 1) % preallocatedProgramsSize;
|
||||
bool pushed = programQueue.Push(newProgram);
|
||||
|
||||
if (!pushed) {
|
||||
Helpers::warn("AsyncCompilerThread: Queue full, spinning");
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
while (!pushed) {
|
||||
pushed = programQueue.Push(newProgram);
|
||||
}
|
||||
}
|
||||
|
||||
void AsyncCompilerThread::Finish()
|
||||
{
|
||||
hasWork.test_and_set();
|
||||
|
||||
// Wait for the compiler thread to finish any outstanding work
|
||||
while (hasWork.test_and_set()) {}
|
||||
}
|
|
@ -9,6 +9,7 @@
|
|||
#include "PICA/pica_frag_uniforms.hpp"
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "renderer_gl/async_compiler.hpp"
|
||||
#include "math_util.hpp"
|
||||
|
||||
CMRC_DECLARE(RendererGL);
|
||||
|
@ -172,9 +173,18 @@ void RendererGL::initGraphicsContextInternal() {
|
|||
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
|
||||
}
|
||||
|
||||
// The OpenGL renderer doesn't need to do anything with the GL context (For Qt frontend) or the SDL window (For SDL frontend)
|
||||
// So we just call initGraphicsContextInternal for both
|
||||
void RendererGL::initGraphicsContext([[maybe_unused]] SDL_Window* window) { initGraphicsContextInternal(); }
|
||||
#ifdef PANDA3DS_FRONTEND_QT
|
||||
void RendererGL::initGraphicsContext(GL::Context* context)
|
||||
#elif defined(PANDA3DS_FRONTEND_SDL)
|
||||
void RendererGL::initGraphicsContext(SDL_Window* context)
|
||||
#endif
|
||||
{
|
||||
if (shaderMode == ShaderMode::Hybrid) {
|
||||
asyncCompiler = new AsyncCompilerThread(fragShaderGen, context);
|
||||
}
|
||||
|
||||
initGraphicsContextInternal();
|
||||
}
|
||||
|
||||
// Set up the OpenGL blending context to match the emulated PICA
|
||||
void RendererGL::setupBlending() {
|
||||
|
@ -414,15 +424,46 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
OpenGL::Triangle,
|
||||
};
|
||||
|
||||
bool usingUbershader = shaderMode == ShaderMode::Ubershader;
|
||||
if (usingUbershader) {
|
||||
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
|
||||
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
|
||||
bool usingUbershader;
|
||||
switch (shaderMode) {
|
||||
case ShaderMode::Ubershader: {
|
||||
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
|
||||
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
|
||||
|
||||
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
|
||||
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
|
||||
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
|
||||
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
|
||||
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
|
||||
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
|
||||
usingUbershader = false;
|
||||
} else {
|
||||
usingUbershader = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ShaderMode::Specialized: {
|
||||
usingUbershader = false;
|
||||
break;
|
||||
}
|
||||
|
||||
case ShaderMode::Hybrid: {
|
||||
PICA::FragmentConfig fsConfig(regs); // TODO: introduce code duplication to make sure this constructor/lookup isn't done too many times
|
||||
auto cachedProgram = shaderCache.find(fsConfig);
|
||||
if (cachedProgram == shaderCache.end()) {
|
||||
CachedProgram& program = shaderCache[fsConfig];
|
||||
program.compiling.store(true);
|
||||
asyncCompiler->PushFragmentConfig(fsConfig, &program);
|
||||
usingUbershader = true;
|
||||
} else if (cachedProgram->second.compiling.load(std::memory_order_relaxed)) {
|
||||
usingUbershader = true;
|
||||
} else {
|
||||
usingUbershader = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
Helpers::panic("Invalid shader mode");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -844,14 +885,20 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
|
|||
OpenGL::Program& program = programEntry.program;
|
||||
|
||||
if (!program.exists()) {
|
||||
if (shaderMode == ShaderMode::Hybrid) {
|
||||
Helpers::panic("Compiling shaders in main thread, this should never happen");
|
||||
}
|
||||
|
||||
std::string fs = fragShaderGen.generate(fsConfig);
|
||||
|
||||
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
||||
program.create({defaultShadergenVs, fragShader});
|
||||
gl.useProgram(program);
|
||||
|
||||
fragShader.free();
|
||||
}
|
||||
|
||||
if (programEntry.needsInitialization) {
|
||||
gl.useProgram(program);
|
||||
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
|
||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
||||
|
@ -862,6 +909,7 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
|
|||
// As it's an OpenGL 4.2 feature that MacOS doesn't support...
|
||||
uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
|
||||
glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding);
|
||||
programEntry.needsInitialization = false;
|
||||
}
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, shadergenFragmentUBO);
|
||||
|
||||
|
@ -979,6 +1027,11 @@ void RendererGL::screenshot(const std::string& name) {
|
|||
}
|
||||
|
||||
void RendererGL::clearShaderCache() {
|
||||
if (asyncCompiler && shaderMode == ShaderMode::Hybrid) {
|
||||
// May contain objects that are still in use, so we need to clear them first
|
||||
asyncCompiler->Finish();
|
||||
}
|
||||
|
||||
for (auto& shader : shaderCache) {
|
||||
CachedProgram& cachedProgram = shader.second;
|
||||
cachedProgram.program.free();
|
||||
|
|
|
@ -35,6 +35,7 @@ FrontendSDL::FrontendSDL() : keyboardMappings(InputMappings::defaultKeyboardMapp
|
|||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, config.rendererType == RendererType::Software ? 3 : 4);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, config.rendererType == RendererType::Software ? 3 : 1);
|
||||
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
|
||||
window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 400, 480, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
|
||||
|
||||
if (window == nullptr) {
|
||||
|
@ -342,3 +343,38 @@ void FrontendSDL::run() {
|
|||
SDL_GL_SwapWindow(window);
|
||||
}
|
||||
}
|
||||
|
||||
namespace AsyncCompiler {
|
||||
void* createContext(void* window) {
|
||||
SDL_Window* sdlWindow = static_cast<SDL_Window*>(window);
|
||||
|
||||
// SDL_GL_CreateContext also makes it the current context so we need to switch back after creation
|
||||
SDL_GLContext currentContext = SDL_GL_GetCurrentContext();
|
||||
|
||||
SDL_GLContext glContext = SDL_GL_CreateContext(sdlWindow);
|
||||
|
||||
if (glContext == nullptr) {
|
||||
Helpers::panic("OpenGL context creation failed: %s", SDL_GetError());
|
||||
}
|
||||
|
||||
// As per the wiki you should check the value after creating the context
|
||||
// as it can differ from the requested value
|
||||
int sharingEnabled;
|
||||
SDL_GL_GetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, &sharingEnabled);
|
||||
if (!sharingEnabled) {
|
||||
Helpers::panic("OpenGL context sharing not enabled");
|
||||
}
|
||||
|
||||
SDL_GL_MakeCurrent(sdlWindow, currentContext);
|
||||
|
||||
return glContext;
|
||||
}
|
||||
|
||||
void makeCurrent(void* window, void* context) {
|
||||
SDL_GL_MakeCurrent((SDL_Window*)window, (SDL_GLContext)context);
|
||||
}
|
||||
|
||||
void destroyContext(void* context) {
|
||||
SDL_GL_DeleteContext(static_cast<SDL_GLContext>(context));
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue