Merge pull request #70 from wheremyfoodat/gl_state

More efficient OpenGL state management
This commit is contained in:
wheremyfoodat 2023-07-06 00:51:06 +03:00 committed by GitHub
commit f630d519ea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 355 additions and 179 deletions

View file

@ -83,8 +83,8 @@ else()
message(FATAL_ERROR "Currently unsupported CPU architecture") message(FATAL_ERROR "Currently unsupported CPU architecture")
endif() endif()
set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/core/CPU/cpu_dynarmic.cpp src/core/CPU/dynarmic_cycles.cpp set(SOURCE_FILES src/main.cpp src/emulator.cpp src/io_file.cpp src/gl_state.cpp src/core/CPU/cpu_dynarmic.cpp
src/core/memory.cpp src/core/CPU/dynarmic_cycles.cpp src/core/memory.cpp
) )
set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp) set(CRYPTO_SOURCE_FILES src/core/crypto/aes_engine.cpp)
set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp set(KERNEL_SOURCE_FILES src/core/kernel/kernel.cpp src/core/kernel/resource_limits.cpp
@ -138,7 +138,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/opengl.hpp inc
include/PICA/dynapica/shader_rec_emitter_x64.hpp include/PICA/pica_hash.hpp include/result/result.hpp include/PICA/dynapica/shader_rec_emitter_x64.hpp include/PICA/pica_hash.hpp include/result/result.hpp
include/result/result_common.hpp include/result/result_fs.hpp include/result/result_fnd.hpp include/result/result_common.hpp include/result/result_fs.hpp include/result/result_fnd.hpp
include/result/result_gsp.hpp include/result/result_kernel.hpp include/result/result_os.hpp include/result/result_gsp.hpp include/result/result_kernel.hpp include/result/result_os.hpp
include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/crypto/aes_engine.hpp include/metaprogramming.hpp include/PICA/pica_vertex.hpp include/gl_state.hpp
) )
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp

View file

@ -28,7 +28,7 @@ class GPU {
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
std::array<PicaVertex, 3> immediateModeVertices; std::array<PICA::Vertex, 3> immediateModeVertices;
uint immediateModeVertIndex; uint immediateModeVertIndex;
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
@ -68,7 +68,7 @@ class GPU {
u32* cmdBuffCurr = nullptr; u32* cmdBuffCurr = nullptr;
Renderer renderer; Renderer renderer;
PicaVertex getImmediateModeVertex(); PICA::Vertex getImmediateModeVertex();
public: public:
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
@ -81,7 +81,7 @@ class GPU {
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU // Set to false by the renderer when the lighting_lut is uploaded ot the GPU
bool lightingLUTDirty = false; bool lightingLUTDirty = false;
GPU(Memory& mem); GPU(Memory& mem, GLStateManager& gl);
void initGraphicsContext() { renderer.initGraphicsContext(); } void initGraphicsContext() { renderer.initGraphicsContext(); }
void getGraphicsContext() { renderer.getGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); }
void display() { renderer.display(); } void display() { renderer.display(); }

View file

@ -2,35 +2,37 @@
#include "PICA/float_types.hpp" #include "PICA/float_types.hpp"
#include <array> #include <array>
// A representation of the output vertex as it comes out of the vertex shader, with padding and all namespace PICA {
struct PicaVertex { // A representation of the output vertex as it comes out of the vertex shader, with padding and all
using vec2f = std::array<Floats::f24, 2>; struct Vertex {
using vec3f = std::array<Floats::f24, 3>; using vec2f = std::array<Floats::f24, 2>;
using vec4f = std::array<Floats::f24, 4>; using vec3f = std::array<Floats::f24, 3>;
using vec4f = std::array<Floats::f24, 4>;
union { union {
struct { struct {
vec4f positions; // Vertex position vec4f positions; // Vertex position
vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting) vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting)
vec4f colour; // Vertex color vec4f colour; // Vertex color
vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!) vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!)
vec2f texcoord1; // Texcoords for TU 1 vec2f texcoord1; // Texcoords for TU 1
Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture Floats::f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture
u32 padding; // Unused u32 padding; // Unused
vec3f view; // View vector (for fragment lighting) vec3f view; // View vector (for fragment lighting)
u32 padding2; // Unused u32 padding2; // Unused
vec2f texcoord2; // Texcoords for TU 2 vec2f texcoord2; // Texcoords for TU 2
} s; } s;
// The software, non-accelerated vertex loader writes here and then reads specific components from the above struct // The software, non-accelerated vertex loader writes here and then reads specific components from the above struct
Floats::f24 raw[0x20]; Floats::f24 raw[0x20];
};
Vertex() {}
}; };
PicaVertex() {} } // namespace PICA
};
// Float is used here instead of Floats::f24 to ensure that Floats::f24 is properly sized for direct interpretations as a float by the render backend // Float is used here instead of Floats::f24 to ensure that Floats::f24 is properly sized for direct interpretations as a float by the render backend
#define ASSERT_POS(member, pos) static_assert(offsetof(PicaVertex, s.member) == pos * sizeof(float), "PicaVertex struct is broken!"); #define ASSERT_POS(member, pos) static_assert(offsetof(PICA::Vertex, s.member) == pos * sizeof(float), "PICA::Vertex struct is broken!");
ASSERT_POS(positions, 0) ASSERT_POS(positions, 0)
ASSERT_POS(quaternion, 4) ASSERT_POS(quaternion, 4)

View file

@ -11,7 +11,7 @@
#include "crypto/aes_engine.hpp" #include "crypto/aes_engine.hpp"
#include "io_file.hpp" #include "io_file.hpp"
#include "memory.hpp" #include "memory.hpp"
#include "opengl.hpp" #include "gl_state.hpp"
enum class ROMType { None, ELF, NCSD }; enum class ROMType { None, ELF, NCSD };
@ -22,6 +22,7 @@ class Emulator {
Kernel kernel; Kernel kernel;
Crypto::AESEngine aesEngine; Crypto::AESEngine aesEngine;
GLStateManager gl;
SDL_Window* window; SDL_Window* window;
SDL_GLContext glContext; SDL_GLContext glContext;
SDL_GameController* gameController; SDL_GameController* gameController;
@ -56,5 +57,5 @@ class Emulator {
bool loadNCSD(const std::filesystem::path& path); bool loadNCSD(const std::filesystem::path& path);
bool loadELF(const std::filesystem::path& path); bool loadELF(const std::filesystem::path& path);
bool loadELF(std::ifstream& file); bool loadELF(std::ifstream& file);
void initGraphicsContext() { gpu.initGraphicsContext(); } void initGraphicsContext();
}; };

140
include/gl_state.hpp Normal file
View file

@ -0,0 +1,140 @@
#pragma once
#include <type_traits>
#include "opengl.hpp"
// GL state manager object for use in the OpenGL GPU renderer and potentially other things in the future (such as a potential ImGui GUI)
// This object is meant to help us avoid duplicate OpenGL calls (such as binding the same program twice, enabling/disabling a setting twice, etc)
// by checking if we actually *need* a state change. This is meant to avoid expensive driver calls and minimize unneeded state changes
// A lot of code is in the header file instead of the relevant source file to make sure stuff gets inlined even without LTO, and
// because this header should ideally not be getting included in too many places
// Code that does not need inlining however, like the reset() function should be in gl_state.cpp
// This state manager may not handle every aspect of OpenGL, in which case anything not handled here should just be manipulated with raw
// OpenGL/opengl.hpp calls However, anything that can be handled through the state manager should, or at least there should be an attempt to keep it
// consistent with the current GL state to avoid bugs/suboptimal code.
// The state manager must *also* be a trivially constructible/destructible type, to ensure that no OpenGL functions get called sneakily without us
// knowing. This is important for when we want to eg add a Vulkan or misc backend. Would definitely not want to refactor all this. So we try to be as
// backend-agnostic as possible
struct GLStateManager {
bool blendEnabled;
bool depthEnabled;
bool scissorEnabled;
// Colour/depth masks
bool redMask, greenMask, blueMask, alphaMask;
bool depthMask;
GLuint boundVAO;
GLuint boundVBO;
GLuint currentProgram;
GLenum depthFunc;
void reset();
void resetBlend();
void resetColourMask();
void resetDepth();
void resetVAO();
void resetVBO();
void resetProgram();
void resetScissor();
void enableDepth() {
if (!depthEnabled) {
depthEnabled = true;
OpenGL::enableDepth();
}
}
void disableDepth() {
if (depthEnabled) {
depthEnabled = false;
OpenGL::disableDepth();
}
}
void enableBlend() {
if (!blendEnabled) {
blendEnabled = true;
OpenGL::enableBlend();
}
}
void disableBlend() {
if (blendEnabled) {
blendEnabled = false;
OpenGL::disableBlend();
}
}
void enableScissor() {
if (!scissorEnabled) {
scissorEnabled = true;
OpenGL::enableScissor();
}
}
void disableScissor() {
if (scissorEnabled) {
scissorEnabled = false;
OpenGL::disableScissor();
}
}
void bindVAO(GLuint handle) {
if (boundVAO != handle) {
boundVAO = handle;
glBindVertexArray(handle);
}
}
void bindVBO(GLuint handle) {
if (boundVBO != handle) {
boundVBO = handle;
glBindBuffer(GL_ARRAY_BUFFER, handle);
}
}
void useProgram(GLuint handle) {
if (currentProgram != handle) {
currentProgram = handle;
glUseProgram(handle);
}
}
void bindVAO(const OpenGL::VertexArray& vao) { bindVAO(vao.handle()); }
void bindVBO(const OpenGL::VertexBuffer& vbo) { bindVBO(vbo.handle()); }
void useProgram(const OpenGL::Program& program) { useProgram(program.handle()); }
void setColourMask(bool r, bool g, bool b, bool a) {
if (r != redMask || g != greenMask || b != blueMask || a != alphaMask) {
r = redMask;
g = greenMask;
b = blueMask;
a = alphaMask;
OpenGL::setColourMask(r, g, b, a);
}
}
void setDepthMask(bool mask) {
if (depthMask != mask) {
depthMask = mask;
OpenGL::setDepthMask(mask);
}
}
void setDepthFunc(GLenum func) {
if (depthFunc != func) {
depthFunc = func;
glDepthFunc(func);
}
}
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
};
static_assert(std::is_trivially_constructible<GLStateManager>(), "OpenGL State Manager class is not trivially constructible!");
static_assert(std::is_trivially_destructible<GLStateManager>(), "OpenGL State Manager class is not trivially destructible!");

View file

@ -128,9 +128,9 @@ namespace OpenGL {
#ifdef OPENGL_DESTRUCTORS #ifdef OPENGL_DESTRUCTORS
~VertexArray() { free(); } ~VertexArray() { free(); }
#endif #endif
GLuint handle() { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() { return m_handle != 0; } bool exists() const { return m_handle != 0; }
void bind() { glBindVertexArray(m_handle); } void bind() const { glBindVertexArray(m_handle); }
template <typename T> template <typename T>
void setAttributeFloat(GLuint index, GLint size, GLsizei stride, const void* offset, bool normalized = GL_FALSE) { void setAttributeFloat(GLuint index, GLint size, GLsizei stride, const void* offset, bool normalized = GL_FALSE) {
@ -299,11 +299,11 @@ namespace OpenGL {
#ifdef OPENGL_DESTRUCTORS #ifdef OPENGL_DESTRUCTORS
~Texture() { free(); } ~Texture() { free(); }
#endif #endif
GLuint handle() { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() { return m_handle != 0; } bool exists() const { return m_handle != 0; }
void bind() { glBindTexture(m_binding, m_handle); } void bind() const { glBindTexture(m_binding, m_handle); }
int width() { return m_width; } int width() const { return m_width; }
int height() { return m_height; } int height() const { return m_height; }
void free() { glDeleteTextures(1, &m_handle); } void free() { glDeleteTextures(1, &m_handle); }
}; };
@ -327,10 +327,10 @@ namespace OpenGL {
#ifdef OPENGL_DESTRUCTORS #ifdef OPENGL_DESTRUCTORS
~Framebuffer() { free(); } ~Framebuffer() { free(); }
#endif #endif
GLuint handle() { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() { return m_handle != 0; } bool exists() const { return m_handle != 0; }
void bind(GLenum target) { glBindFramebuffer(target, m_handle); } void bind(GLenum target) const { glBindFramebuffer(target, m_handle); }
void bind(FramebufferTypes target) { bind(static_cast<GLenum>(target)); } void bind(FramebufferTypes target) const { bind(static_cast<GLenum>(target)); }
void free() { glDeleteFramebuffers(1, &m_handle); } void free() { glDeleteFramebuffers(1, &m_handle); }
void createWithTexture(Texture& tex, GLenum mode = GL_FRAMEBUFFER, GLenum textureType = GL_TEXTURE_2D) { void createWithTexture(Texture& tex, GLenum mode = GL_FRAMEBUFFER, GLenum textureType = GL_TEXTURE_2D) {
@ -392,8 +392,8 @@ namespace OpenGL {
return m_handle != 0; return m_handle != 0;
} }
GLuint handle() { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() { return m_handle != 0; } bool exists() const { return m_handle != 0; }
}; };
struct Program { struct Program {
@ -421,9 +421,9 @@ namespace OpenGL {
return m_handle != 0; return m_handle != 0;
} }
GLuint handle() { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() { return m_handle != 0; } bool exists() const { return m_handle != 0; }
void use() { glUseProgram(m_handle); } void use() const { glUseProgram(m_handle); }
}; };
static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) { static void dispatchCompute(GLuint groupsX = 1, GLuint groupsY = 1, GLuint groupsZ = 1) {
@ -454,9 +454,9 @@ namespace OpenGL {
#ifdef OPENGL_DESTRUCTORS #ifdef OPENGL_DESTRUCTORS
~VertexBuffer() { free(); } ~VertexBuffer() { free(); }
#endif #endif
GLuint handle() { return m_handle; } GLuint handle() const { return m_handle; }
bool exists() { return m_handle != 0; } bool exists() const { return m_handle != 0; }
void bind() { glBindBuffer(GL_ARRAY_BUFFER, m_handle); } void bind() const { glBindBuffer(GL_ARRAY_BUFFER, m_handle); }
void free() { glDeleteBuffers(1, &m_handle); } void free() { glDeleteBuffers(1, &m_handle); }
// Reallocates the buffer on every call. Prefer the sub version if possible. // Reallocates the buffer on every call. Prefer the sub version if possible.
@ -528,6 +528,8 @@ namespace OpenGL {
static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); } static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); }
static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast<GLenum>(func)); } static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast<GLenum>(func)); }
static void setColourMask(GLboolean r, GLboolean g, GLboolean b, GLboolean a) { glColorMask(r, g, b, a); }
static void setDepthMask(GLboolean mask) { glDepthMask(mask); }
enum Primitives { enum Primitives {
Triangle = GL_TRIANGLES, Triangle = GL_TRIANGLES,
@ -667,23 +669,23 @@ namespace OpenGL {
// We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place // We're never supporting 3D rectangles, because rectangles were never meant to be 3D in the first place
// x, y: Coords of the top left vertex // x, y: Coords of the top left vertex
// width, height: Dimensions of the rectangle. Initialized to 0 if not specified. // width, height: Dimensions of the rectangle. Initialized to 0 if not specified.
template <typename T> template <typename T>
struct Rectangle { struct Rectangle {
T x, y, width, height; T x, y, width, height;
std::pair<T, T> topLeft() { return std::make_pair(x, y); } std::pair<T, T> topLeft() const { return std::make_pair(x, y); }
std::pair<T, T> topRight() { return std::make_pair(x + width, y); } std::pair<T, T> topRight() const { return std::make_pair(x + width, y); }
std::pair<T, T> bottomLeft() { return std::make_pair(x, y + height); } std::pair<T, T> bottomLeft() const { return std::make_pair(x, y + height); }
std::pair<T, T> bottomRight() { return std::make_pair(x + width, y + height); } std::pair<T, T> bottomRight() const { return std::make_pair(x + width, y + height); }
Rectangle() : x(0), y(0), width(0), height(0) {} Rectangle() : x(0), y(0), width(0), height(0) {}
Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {} Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {}
bool isEmpty() { return width == 0 && height == 0; } bool isEmpty() const { return width == 0 && height == 0; }
bool isLine() { return (width == 0 && height != 0) || (width != 0 && height == 0); } bool isLine() const { return (width == 0 && height != 0) || (width != 0 && height == 0); }
void setEmpty() { x = y = width = height = 0; } void setEmpty() { x = y = width = height = 0; }
}; };
using Rect = Rectangle<GLuint>; using Rect = Rectangle<GLuint>;

View file

@ -3,9 +3,9 @@
#include <span> #include <span>
#include "PICA/float_types.hpp" #include "PICA/float_types.hpp"
#include "gl_state.hpp"
#include "helpers.hpp" #include "helpers.hpp"
#include "logger.hpp" #include "logger.hpp"
#include "opengl.hpp"
#include "surface_cache.hpp" #include "surface_cache.hpp"
#include "textures.hpp" #include "textures.hpp"
#include "PICA/regs.hpp" #include "PICA/regs.hpp"
@ -16,13 +16,13 @@ class GPU;
class Renderer { class Renderer {
GPU& gpu; GPU& gpu;
GLStateManager& gl;
OpenGL::Program triangleProgram; OpenGL::Program triangleProgram;
OpenGL::Program displayProgram; OpenGL::Program displayProgram;
OpenGL::VertexArray vao; OpenGL::VertexArray vao;
OpenGL::VertexBuffer vbo; OpenGL::VertexBuffer vbo;
GLint alphaControlLoc = -1;
GLint texUnitConfigLoc = -1;
// TEV configuration uniform locations // TEV configuration uniform locations
GLint textureEnvSourceLoc = -1; GLint textureEnvSourceLoc = -1;
@ -30,8 +30,8 @@ class Renderer {
GLint textureEnvCombinerLoc = -1; GLint textureEnvCombinerLoc = -1;
GLint textureEnvColorLoc = -1; GLint textureEnvColorLoc = -1;
GLint textureEnvScaleLoc = -1; GLint textureEnvScaleLoc = -1;
GLint textureEnvUpdateBufferLoc = -1;
GLint textureEnvBufferColorLoc = -1; // Uniform of PICA registers
GLint picaRegLoc = -1; GLint picaRegLoc = -1;
// Depth configuration uniform locations // Depth configuration uniform locations
@ -39,9 +39,6 @@ class Renderer {
GLint depthScaleLoc = -1; GLint depthScaleLoc = -1;
GLint depthmapEnableLoc = -1; GLint depthmapEnableLoc = -1;
u32 oldAlphaControl = 0;
u32 oldTexUnitConfig = 0;
float oldDepthScale = -1.0; float oldDepthScale = -1.0;
float oldDepthOffset = 0.0; float oldDepthOffset = 0.0;
bool oldDepthmapEnable = false; bool oldDepthmapEnable = false;
@ -81,7 +78,7 @@ class Renderer {
void updateLightingLUT(); void updateLightingLUT();
public: public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {} Renderer(GPU& gpu, GLStateManager& gl, const std::array<u32, regNum>& internalRegs) : gpu(gpu), gl(gl), regs(internalRegs) {}
void reset(); void reset();
void display(); // Display the 3DS screen contents to the window void display(); // Display the 3DS screen contents to the window
@ -89,7 +86,7 @@ class Renderer {
void getGraphicsContext(); // Set up graphics context for rendering void getGraphicsContext(); // Set up graphics context for rendering
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); // Clear a GPU buffer in VRAM
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags); // Perform display transfer
void drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices); // Draw the given vertices void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices); // Draw the given vertices
void setFBSize(u32 width, u32 height) { void setFBSize(u32 width, u32 height) {
fbSize.x() = width; fbSize.x() = width;

View file

@ -10,8 +10,9 @@
using namespace Floats; using namespace Floats;
// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it
GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) { // Thus, our GLStateManager being here does not negatively impact renderer-agnosticness
GPU::GPU(Memory& mem, GLStateManager& gl) : mem(mem), renderer(*this, gl, regs) {
vram = new u8[vramSize]; vram = new u8[vramSize];
mem.setVRAM(vram); // Give the bus a pointer to our VRAM mem.setVRAM(vram); // Give the bus a pointer to our VRAM
} }
@ -22,6 +23,7 @@ void GPU::reset() {
shaderJIT.reset(); shaderJIT.reset();
std::memset(vram, 0, vramSize); std::memset(vram, 0, vramSize);
lightingLUT.fill(0); lightingLUT.fill(0);
lightingLUTDirty = true;
totalAttribCount = 0; totalAttribCount = 0;
fixedAttribMask = 0; fixedAttribMask = 0;
@ -60,7 +62,7 @@ void GPU::drawArrays(bool indexed) {
} }
} }
static std::array<PicaVertex, Renderer::vertexBufferSize> vertices; static std::array<PICA::Vertex, Renderer::vertexBufferSize> vertices;
template <bool indexed, bool useShaderJIT> template <bool indexed, bool useShaderJIT>
void GPU::drawArrays() { void GPU::drawArrays() {
@ -248,7 +250,7 @@ void GPU::drawArrays() {
shaderUnit.vs.run(); shaderUnit.vs.run();
} }
PicaVertex& out = vertices[i]; PICA::Vertex& out = vertices[i];
// Map shader outputs to fixed function properties // Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7; const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) { for (int i = 0; i < totalShaderOutputs; i++) {
@ -264,8 +266,8 @@ void GPU::drawArrays() {
renderer.drawVertices(primType, std::span(vertices).first(vertexCount)); renderer.drawVertices(primType, std::span(vertices).first(vertexCount));
} }
PicaVertex GPU::getImmediateModeVertex() { PICA::Vertex GPU::getImmediateModeVertex() {
PicaVertex v; PICA::Vertex v;
const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1; const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
// Copy immediate mode attributes to vertex shader unit // Copy immediate mode attributes to vertex shader unit

View file

@ -188,7 +188,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
immediateModeAttributes[immediateModeAttrIndex++] = attr; immediateModeAttributes[immediateModeAttrIndex++] = attr;
if (immediateModeAttrIndex == totalAttrCount) { if (immediateModeAttrIndex == totalAttrCount) {
PicaVertex v = getImmediateModeVertex(); PICA::Vertex v = getImmediateModeVertex();
immediateModeAttrIndex = 0; immediateModeAttrIndex = 0;
immediateModeVertices[immediateModeVertIndex++] = v; immediateModeVertices[immediateModeVertIndex++] = v;

View file

@ -34,12 +34,11 @@ const char* vertexShader = R"(
// TEV uniforms // TEV uniforms
uniform uint u_textureEnvColor[6]; uniform uint u_textureEnvColor[6];
uniform uint u_textureEnvBufferColor; uniform uint u_picaRegs[0x200 - 0x48];
uniform uint u_picaRegs[0x200 - 0x47];
// Helper so that the implementation of u_pica_regs can be changed later // Helper so that the implementation of u_pica_regs can be changed later
uint readPicaReg(uint reg_addr){ uint readPicaReg(uint reg_addr){
return u_picaRegs[reg_addr - 0x47]; return u_picaRegs[reg_addr - 0x48];
} }
vec4 abgr8888ToVec4(uint abgr) { vec4 abgr8888ToVec4(uint abgr) {
@ -96,7 +95,7 @@ const char* vertexShader = R"(
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]); v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
} }
v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor); v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD));
// Parse clipping plane registers // Parse clipping plane registers
// The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 // The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0
@ -131,15 +130,11 @@ const char* fragmentShader = R"(
out vec4 fragColour; out vec4 fragColour;
uniform uint u_alphaControl;
uniform uint u_textureConfig;
// TEV uniforms // TEV uniforms
uniform uint u_textureEnvSource[6]; uniform uint u_textureEnvSource[6];
uniform uint u_textureEnvOperand[6]; uniform uint u_textureEnvOperand[6];
uniform uint u_textureEnvCombiner[6]; uniform uint u_textureEnvCombiner[6];
uniform uint u_textureEnvScale[6]; uniform uint u_textureEnvScale[6];
uniform uint u_textureEnvUpdateBuffer;
// Depth control uniforms // Depth control uniforms
uniform float u_depthScale; uniform float u_depthScale;
@ -151,11 +146,11 @@ const char* fragmentShader = R"(
uniform sampler2D u_tex2; uniform sampler2D u_tex2;
uniform sampler1DArray u_tex_lighting_lut; uniform sampler1DArray u_tex_lighting_lut;
uniform uint u_picaRegs[0x200 - 0x47]; uniform uint u_picaRegs[0x200 - 0x48];
// Helper so that the implementation of u_pica_regs can be changed later // Helper so that the implementation of u_pica_regs can be changed later
uint readPicaReg(uint reg_addr){ uint readPicaReg(uint reg_addr){
return u_picaRegs[reg_addr - 0x47]; return u_picaRegs[reg_addr - 0x48];
} }
vec4 tevSources[16]; vec4 tevSources[16];
@ -340,7 +335,7 @@ const char* fragmentShader = R"(
bool error_unimpl = false; bool error_unimpl = false;
for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){ for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3); uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3);
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id); uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id);
@ -363,8 +358,8 @@ const char* fragmentShader = R"(
vec3 half_vector = normalize(normalize(light_vector) + view); vec3 half_vector = normalize(normalize(light_vector) + view);
for(int c = 0; c < 7; c++){ for (int c = 0; c < 7; c++) {
if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){ if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3); uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
float scale = float(1u << scale_id); float scale = float(1u << scale_id);
if (scale_id >= 6u) if (scale_id >= 6u)
@ -404,15 +399,15 @@ const char* fragmentShader = R"(
d[D1_LUT] = 0.0; d[D1_LUT] = 0.0;
d[FR_LUT] = 0.0; d[FR_LUT] = 0.0;
d[RG_LUT]= d[RB_LUT] = d[RR_LUT]; d[RG_LUT]= d[RB_LUT] = d[RR_LUT];
} else if(lookup_config == 1) { } else if (lookup_config == 1) {
d[D0_LUT] = 0.0; d[D0_LUT] = 0.0;
d[D1_LUT] = 0.0; d[D1_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if(lookup_config == 2) { } else if (lookup_config == 2) {
d[FR_LUT] = 0.0; d[FR_LUT] = 0.0;
d[SP_LUT] = 0.0; d[SP_LUT] = 0.0;
d[RG_LUT] = d[RB_LUT] = d[RR_LUT]; d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
} else if(lookup_config == 3) { } else if (lookup_config == 3) {
d[SP_LUT] = 0.0; d[SP_LUT] = 0.0;
d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0; d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0;
} else if (lookup_config == 4) { } else if (lookup_config == 4) {
@ -453,20 +448,22 @@ const char* fragmentShader = R"(
} }
void main() { void main() {
vec2 tex2UV = (u_textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2;
// TODO: what do invalid sources and disabled textures read as? // TODO: what do invalid sources and disabled textures read as?
// And what does the "previous combiner" source read initially? // And what does the "previous combiner" source read initially?
tevSources[0] = v_colour; // Primary/vertex color tevSources[0] = v_colour; // Primary/vertex color
calcLighting(tevSources[1],tevSources[2]); calcLighting(tevSources[1],tevSources[2]);
if ((u_textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy); uint textureConfig = readPicaReg(0x80);
if ((u_textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1); vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2;
if ((u_textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV);
if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy);
if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1);
if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV);
tevSources[13] = vec4(0.0); // Previous buffer tevSources[13] = vec4(0.0); // Previous buffer
tevSources[15] = vec4(0.0); // Previous combiner tevSources[15] = vec4(0.0); // Previous combiner
tevNextPreviousBuffer = v_textureEnvBufferColor; tevNextPreviousBuffer = v_textureEnvBufferColor;
uint textureEnvUpdateBuffer = readPicaReg(0xE0);
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
tevSources[14] = v_textureEnvColor[i]; // Constant color tevSources[14] = v_textureEnvColor[i]; // Constant color
@ -474,11 +471,11 @@ const char* fragmentShader = R"(
tevSources[13] = tevNextPreviousBuffer; tevSources[13] = tevNextPreviousBuffer;
if (i < 4) { if (i < 4) {
if ((u_textureEnvUpdateBuffer & (0x100u << i)) != 0u) { if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) {
tevNextPreviousBuffer.rgb = tevSources[15].rgb; tevNextPreviousBuffer.rgb = tevSources[15].rgb;
} }
if ((u_textureEnvUpdateBuffer & (0x1000u << i)) != 0u) { if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) {
tevNextPreviousBuffer.a = tevSources[15].a; tevNextPreviousBuffer.a = tevSources[15].a;
} }
} }
@ -503,9 +500,11 @@ const char* fragmentShader = R"(
// Write final fragment depth // Write final fragment depth
gl_FragDepth = depth; gl_FragDepth = depth;
if ((u_alphaControl & 1u) != 0u) { // Check if alpha test is on // Perform alpha test
uint func = (u_alphaControl >> 4u) & 7u; uint alphaControl = readPicaReg(0x104);
float reference = float((u_alphaControl >> 8u) & 0xffu) / 255.0; if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
uint func = (alphaControl >> 4u) & 7u;
float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;
float alpha = fragColour.a; float alpha = fragColour.a;
switch (func) { switch (func) {
@ -592,21 +591,17 @@ void Renderer::reset() {
if (triangleProgram.exists()) { if (triangleProgram.exists()) {
const auto oldProgram = OpenGL::getProgram(); const auto oldProgram = OpenGL::getProgram();
triangleProgram.use(); gl.useProgram(triangleProgram);
oldAlphaControl = 0; // Default alpha control to 0
oldTexUnitConfig = 0; // Default tex unit config to 0
oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use
oldDepthOffset = 0.0; // Default depth offset to 0 oldDepthOffset = 0.0; // Default depth offset to 0
oldDepthmapEnable = false; // Enable w buffering oldDepthmapEnable = false; // Enable w buffering
glUniform1ui(alphaControlLoc, oldAlphaControl);
glUniform1ui(texUnitConfigLoc, oldTexUnitConfig);
glUniform1f(depthScaleLoc, oldDepthScale); glUniform1f(depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset); glUniform1f(depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable); glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
glUseProgram(oldProgram); // Switch to old GL program gl.useProgram(oldProgram); // Switch to old GL program
} }
} }
@ -614,18 +609,13 @@ void Renderer::initGraphicsContext() {
OpenGL::Shader vert(vertexShader, OpenGL::Vertex); OpenGL::Shader vert(vertexShader, OpenGL::Vertex);
OpenGL::Shader frag(fragmentShader, OpenGL::Fragment); OpenGL::Shader frag(fragmentShader, OpenGL::Fragment);
triangleProgram.create({ vert, frag }); triangleProgram.create({ vert, frag });
triangleProgram.use(); gl.useProgram(triangleProgram);
alphaControlLoc = OpenGL::uniformLocation(triangleProgram, "u_alphaControl");
texUnitConfigLoc = OpenGL::uniformLocation(triangleProgram, "u_textureConfig");
textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource"); textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand"); textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner"); textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor"); textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale"); textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
textureEnvUpdateBufferLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvUpdateBuffer");
textureEnvBufferColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvBufferColor");
depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale"); depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset"); depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
@ -642,37 +632,37 @@ void Renderer::initGraphicsContext() {
OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
displayProgram.create({ vertDisplay, fragDisplay }); displayProgram.create({ vertDisplay, fragDisplay });
displayProgram.use(); gl.useProgram(displayProgram);
glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object
vbo.createFixedSize(sizeof(PicaVertex) * vertexBufferSize, GL_STREAM_DRAW); vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW);
vbo.bind(); gl.bindVBO(vbo);
vao.create(); vao.create();
vao.bind(); gl.bindVAO(vao);
// Position (x, y, z, w) attributes // Position (x, y, z, w) attributes
vao.setAttributeFloat<float>(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions)); vao.setAttributeFloat<float>(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions));
vao.enableAttribute(0); vao.enableAttribute(0);
// Quaternion attribute // Quaternion attribute
vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion)); vao.setAttributeFloat<float>(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion));
vao.enableAttribute(1); vao.enableAttribute(1);
// Colour attribute // Colour attribute
vao.setAttributeFloat<float>(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour)); vao.setAttributeFloat<float>(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour));
vao.enableAttribute(2); vao.enableAttribute(2);
// UV 0 attribute // UV 0 attribute
vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0)); vao.setAttributeFloat<float>(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0));
vao.enableAttribute(3); vao.enableAttribute(3);
// UV 1 attribute // UV 1 attribute
vao.setAttributeFloat<float>(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1)); vao.setAttributeFloat<float>(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1));
vao.enableAttribute(4); vao.enableAttribute(4);
// UV 0 W-component attribute // UV 0 W-component attribute
vao.setAttributeFloat<float>(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w)); vao.setAttributeFloat<float>(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w));
vao.enableAttribute(5); vao.enableAttribute(5);
// View // View
vao.setAttributeFloat<float>(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view)); vao.setAttributeFloat<float>(6, 3, sizeof(Vertex), offsetof(Vertex, s.view));
vao.enableAttribute(6); vao.enableAttribute(6);
// UV 2 attribute // UV 2 attribute
vao.setAttributeFloat<float>(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2)); vao.setAttributeFloat<float>(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2));
vao.enableAttribute(7); vao.enableAttribute(7);
dummyVBO.create(); dummyVBO.create();
@ -725,9 +715,9 @@ void Renderer::setupBlending() {
}; };
if (!blendingEnabled) { if (!blendingEnabled) {
OpenGL::disableBlend(); gl.disableBlend();
} else { } else {
OpenGL::enableBlend(); gl.enableBlend();
// Get blending equations // Get blending equations
const u32 blendControl = regs[PICA::InternalRegs::BlendFunc]; const u32 blendControl = regs[PICA::InternalRegs::BlendFunc];
@ -783,8 +773,6 @@ void Renderer::setupTextureEnvState() {
glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs); glUniform1uiv(textureEnvCombinerLoc, 6, textureEnvCombinerRegs);
glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs); glUniform1uiv(textureEnvColorLoc, 6, textureEnvColourRegs);
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs); glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs);
glUniform1ui(textureEnvUpdateBufferLoc, regs[PICA::InternalRegs::TexEnvUpdateBuffer]);
glUniform1ui(textureEnvBufferColorLoc, regs[PICA::InternalRegs::TexEnvBufferColor]);
} }
void Renderer::bindTexturesToSlots() { void Renderer::bindTexturesToSlots() {
@ -815,21 +803,16 @@ void Renderer::bindTexturesToSlots() {
glActiveTexture(GL_TEXTURE0 + 3); glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
// Update the texture unit configuration uniform if it changed
const u32 texUnitConfig = regs[PICA::InternalRegs::TexUnitCfg];
if (oldTexUnitConfig != texUnitConfig) {
oldTexUnitConfig = texUnitConfig;
glUniform1ui(texUnitConfigLoc, texUnitConfig);
}
} }
void Renderer::updateLightingLUT(){
void Renderer::updateLightingLUT() {
gpu.lightingLUTDirty = false;
std::array<u16, GPU::LightingLutSize> u16_lightinglut; std::array<u16, GPU::LightingLutSize> u16_lightinglut;
for(int i = 0; i < gpu.lightingLUT.size(); i++){ for (int i = 0; i < gpu.lightingLUT.size(); i++) {
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
u16_lightinglut[i] = value * 65535 / 4095; u16_lightinglut[i] = value * 65535 / 4095;
} }
glActiveTexture(GL_TEXTURE0 + 3); glActiveTexture(GL_TEXTURE0 + 3);
glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
@ -839,31 +822,19 @@ void Renderer::updateLightingLUT(){
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
gpu.lightingLUTDirty = false;
} }
void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) { void Renderer::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
static constexpr std::array<OpenGL::Primitives, 4> primTypes = { static constexpr std::array<OpenGL::Primitives, 4> primTypes = {
OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle
}; };
const auto primitiveTopology = primTypes[static_cast<usize>(primType)]; const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
// TODO: We should implement a GL state tracker that tracks settings like scissor, blending, bound program, etc gl.disableScissor();
// This way if we attempt to eg do multiple glEnable(GL_BLEND) calls in a row, it will say "Oh blending is already enabled" gl.bindVBO(vbo);
// And not actually perform the very expensive driver call for it gl.bindVAO(vao);
OpenGL::disableScissor(); gl.useProgram(triangleProgram);
vbo.bind();
vao.bind();
triangleProgram.use();
// Adjust alpha test if necessary
const u32 alphaControl = regs[PICA::InternalRegs::AlphaTestConfig];
if (alphaControl != oldAlphaControl) {
oldAlphaControl = alphaControl;
glUniform1ui(alphaControlLoc, alphaControl);
}
OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled
if (regs[PICA::InternalRegs::ClipEnable] & 1) { if (regs[PICA::InternalRegs::ClipEnable] & 1) {
@ -879,7 +850,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
const bool depthWriteEnable = getBit<12>(depthControl); const bool depthWriteEnable = getBit<12>(depthControl);
const int depthFunc = getBits<4, 3>(depthControl); const int depthFunc = getBits<4, 3>(depthControl);
const int colourMask = getBits<8, 4>(depthControl); const int colourMask = getBits<8, 4>(depthControl);
glColorMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8); gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8);
static constexpr std::array<GLenum, 8> depthModes = { static constexpr std::array<GLenum, 8> depthModes = {
GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL
@ -908,9 +879,9 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
setupTextureEnvState(); setupTextureEnvState();
bindTexturesToSlots(); bindTexturesToSlots();
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47) // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(picaRegLoc, 0x200 - 0x47, &regs[0x47]); glUniform1uiv(picaRegLoc, 0x200 - 0x48, &regs[0x48]);
if (gpu.lightingLUTDirty) { if (gpu.lightingLUTDirty) {
updateLightingLUT(); updateLightingLUT();
@ -924,18 +895,18 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
// Note: The code below must execute after we've bound the colour buffer & its framebuffer // Note: The code below must execute after we've bound the colour buffer & its framebuffer
// Because it attaches a depth texture to the aforementioned colour buffer // Because it attaches a depth texture to the aforementioned colour buffer
if (depthEnable) { if (depthEnable) {
OpenGL::enableDepth(); gl.enableDepth();
glDepthFunc(depthModes[depthFunc]); gl.setDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE);
glDepthMask(depthWriteEnable ? GL_TRUE : GL_FALSE); gl.setDepthFunc(depthModes[depthFunc]);
bindDepthBuffer(); bindDepthBuffer();
} else { } else {
if (depthWriteEnable) { if (depthWriteEnable) {
OpenGL::enableDepth(); gl.enableDepth();
glDepthFunc(GL_ALWAYS); gl.setDepthMask(GL_TRUE);
glDepthMask(GL_TRUE); gl.setDepthFunc(GL_ALWAYS);
bindDepthBuffer(); bindDepthBuffer();
} else { } else {
OpenGL::disableDepth(); gl.disableDepth();
} }
} }
@ -947,7 +918,7 @@ constexpr u32 topScreenBuffer = 0x1f000000;
constexpr u32 bottomScreenBuffer = 0x1f05dc00; constexpr u32 bottomScreenBuffer = 0x1f05dc00;
void Renderer::display() { void Renderer::display() {
OpenGL::disableScissor(); gl.disableScissor();
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
screenFramebuffer.bind(OpenGL::ReadFramebuffer); screenFramebuffer.bind(OpenGL::ReadFramebuffer);
@ -1038,12 +1009,15 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32
tex.bind(); tex.bind();
screenFramebuffer.bind(OpenGL::DrawFramebuffer); screenFramebuffer.bind(OpenGL::DrawFramebuffer);
OpenGL::disableBlend(); gl.disableBlend();
OpenGL::disableDepth(); gl.disableDepth();
OpenGL::disableScissor(); gl.disableScissor();
gl.setColourMask(true, true, true, true);
gl.useProgram(displayProgram);
gl.bindVAO(dummyVAO);
OpenGL::disableClipPlane(0); OpenGL::disableClipPlane(0);
OpenGL::disableClipPlane(1); OpenGL::disableClipPlane(1);
displayProgram.use();
// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture // Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
// We consider output gap == 320 to mean bottom, and anything else to mean top // We consider output gap == 320 to mean bottom, and anything else to mean top
@ -1053,6 +1027,5 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
} }
dummyVAO.bind();
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
} }

View file

@ -1,6 +1,6 @@
#include "emulator.hpp" #include "emulator.hpp"
Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory), memory(cpu.getTicksRef()) { Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl), memory(cpu.getTicksRef()) {
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) { if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) {
Helpers::panic("Failed to initialize SDL2"); Helpers::panic("Failed to initialize SDL2");
} }
@ -326,3 +326,9 @@ bool Emulator::loadELF(std::ifstream& file) {
} }
return true; return true;
} }
// Reset our graphics context and initialize the GPU's graphics context
void Emulator::initGraphicsContext() {
gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL
gpu.initGraphicsContext();
}

53
src/gl_state.cpp Normal file
View file

@ -0,0 +1,53 @@
#include "gl_state.hpp"
void GLStateManager::resetBlend() {
blendEnabled = false;
OpenGL::disableBlend();
}
void GLStateManager::resetColourMask() {
redMask = greenMask = blueMask = alphaMask = true;
OpenGL::setColourMask(redMask, greenMask, blueMask, alphaMask);
}
void GLStateManager::resetDepth() {
depthEnabled = false;
depthMask = true;
depthFunc = GL_LESS;
OpenGL::disableDepth();
OpenGL::setDepthMask(true);
OpenGL::setDepthFunc(OpenGL::DepthFunc::Less);
}
void GLStateManager::resetScissor() {
scissorEnabled = false;
OpenGL::disableScissor();
OpenGL::setScissor(0, 0, 0, 0);
}
void GLStateManager::resetVAO() {
boundVAO = 0;
glBindVertexArray(0);
}
void GLStateManager::resetVBO() {
boundVBO = 0;
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
void GLStateManager::resetProgram() {
currentProgram = 0;
glUseProgram(0);
}
void GLStateManager::reset() {
resetBlend();
resetColourMask();
resetDepth();
resetVAO();
resetVBO();
resetProgram();
resetScissor();
}