mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-12 09:09:47 +12:00
Hook up vertex shaders to shader cache
This commit is contained in:
parent
251ff5ee49
commit
2f4c169cad
10 changed files with 256 additions and 77 deletions
|
@ -256,6 +256,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
|
||||||
include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp
|
include/audio/miniaudio_device.hpp include/ring_buffer.hpp include/bitfield.hpp include/audio/dsp_shared_mem.hpp
|
||||||
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp
|
include/audio/hle_core.hpp include/capstone.hpp include/audio/aac.hpp include/PICA/pica_frag_config.hpp
|
||||||
include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp
|
include/PICA/pica_frag_uniforms.hpp include/PICA/shader_gen_types.hpp include/PICA/shader_decompiler.hpp
|
||||||
|
include/PICA/pica_vert_config.hpp
|
||||||
)
|
)
|
||||||
|
|
||||||
cmrc_add_resource_library(
|
cmrc_add_resource_library(
|
||||||
|
|
|
@ -13,6 +13,12 @@
|
||||||
#include "memory.hpp"
|
#include "memory.hpp"
|
||||||
#include "renderer.hpp"
|
#include "renderer.hpp"
|
||||||
|
|
||||||
|
enum class ShaderExecMode {
|
||||||
|
Interpreter, // Interpret shaders on the CPU
|
||||||
|
JIT, // Recompile shaders to CPU machine code
|
||||||
|
Hardware, // Recompiler shaders to host shaders and run them on the GPU
|
||||||
|
};
|
||||||
|
|
||||||
class GPU {
|
class GPU {
|
||||||
static constexpr u32 regNum = 0x300;
|
static constexpr u32 regNum = 0x300;
|
||||||
static constexpr u32 extRegNum = 0x1000;
|
static constexpr u32 extRegNum = 0x1000;
|
||||||
|
@ -45,7 +51,7 @@ class GPU {
|
||||||
uint immediateModeVertIndex;
|
uint immediateModeVertIndex;
|
||||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||||
|
|
||||||
template <bool indexed, bool useShaderJIT>
|
template <bool indexed, ShaderExecMode mode>
|
||||||
void drawArrays();
|
void drawArrays();
|
||||||
|
|
||||||
// Silly method of avoiding linking problems. TODO: Change to something less silly
|
// Silly method of avoiding linking problems. TODO: Change to something less silly
|
||||||
|
|
31
include/PICA/pica_vert_config.hpp
Normal file
31
include/PICA/pica_vert_config.hpp
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
#pragma once
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "PICA/pica_hash.hpp"
|
||||||
|
#include "PICA/regs.hpp"
|
||||||
|
#include "bitfield.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
|
|
||||||
|
namespace PICA {
|
||||||
|
// Configuration struct used
|
||||||
|
struct VertConfig {
|
||||||
|
PICAHash::HashType shaderHash;
|
||||||
|
PICAHash::HashType opdescHash;
|
||||||
|
u32 entrypoint;
|
||||||
|
bool usingUbershader;
|
||||||
|
|
||||||
|
bool operator==(const VertConfig& config) const {
|
||||||
|
// Hash function and equality operator required by std::unordered_map
|
||||||
|
return std::memcmp(this, &config, sizeof(VertConfig)) == 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace PICA
|
||||||
|
|
||||||
|
// Override std::hash for our vertex config class
|
||||||
|
template <>
|
||||||
|
struct std::hash<PICA::VertConfig> {
|
||||||
|
std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
|
||||||
|
};
|
|
@ -107,6 +107,11 @@ class PICAShader {
|
||||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||||
alignas(16) std::array<vec4f, 16> outputs;
|
alignas(16) std::array<vec4f, 16> outputs;
|
||||||
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||||
|
|
||||||
|
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||||
|
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||||
|
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||||
|
using Hash = PICAHash::HashType;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::array<u32, 128> operandDescriptors;
|
std::array<u32, 128> operandDescriptors;
|
||||||
|
@ -125,11 +130,6 @@ class PICAShader {
|
||||||
std::array<CallInfo, 4> callInfo;
|
std::array<CallInfo, 4> callInfo;
|
||||||
ShaderType type;
|
ShaderType type;
|
||||||
|
|
||||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
|
||||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
|
||||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
|
||||||
using Hash = PICAHash::HashType;
|
|
||||||
|
|
||||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||||
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,8 @@ namespace PICA::ShaderGen {
|
||||||
FragmentGenerator(API api, Language language) : api(api), language(language) {}
|
FragmentGenerator(API api, Language language) : api(api), language(language) {}
|
||||||
std::string generate(const PICA::FragmentConfig& config);
|
std::string generate(const PICA::FragmentConfig& config);
|
||||||
std::string getDefaultVertexShader();
|
std::string getDefaultVertexShader();
|
||||||
|
// For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader
|
||||||
|
std::string getVertexShaderAccelerated(const std::string& picaSource, bool usingUbershader);
|
||||||
|
|
||||||
void setTarget(API api, Language language) {
|
void setTarget(API api, Language language) {
|
||||||
this->api = api;
|
this->api = api;
|
||||||
|
|
|
@ -82,7 +82,8 @@ class Renderer {
|
||||||
// This function is called on every draw call before parsing vertex data.
|
// This function is called on every draw call before parsing vertex data.
|
||||||
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
|
// It is responsible for things like looking up which vertex/fragment shaders to use, recompiling them if they don't exist, choosing between
|
||||||
// ubershaders and shadergen, and so on.
|
// ubershaders and shadergen, and so on.
|
||||||
virtual void prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {}
|
// Returns whether this draw is eligible for using hardware-accelerated shaders or if shaders should run on the CPU
|
||||||
|
virtual bool prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) { return false; }
|
||||||
|
|
||||||
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
|
// Functions for initializing the graphics context for the Qt frontend, where we don't have the convenience of SDL_Window
|
||||||
#ifdef PANDA3DS_FRONTEND_QT
|
#ifdef PANDA3DS_FRONTEND_QT
|
||||||
|
|
|
@ -3,11 +3,14 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "PICA/float_types.hpp"
|
#include "PICA/float_types.hpp"
|
||||||
#include "PICA/pica_frag_config.hpp"
|
#include "PICA/pica_frag_config.hpp"
|
||||||
|
#include "PICA/pica_vert_config.hpp"
|
||||||
#include "PICA/pica_hash.hpp"
|
#include "PICA/pica_hash.hpp"
|
||||||
#include "PICA/pica_vertex.hpp"
|
#include "PICA/pica_vertex.hpp"
|
||||||
#include "PICA/regs.hpp"
|
#include "PICA/regs.hpp"
|
||||||
|
@ -52,6 +55,11 @@ class RendererGL final : public Renderer {
|
||||||
float oldDepthScale = -1.0;
|
float oldDepthScale = -1.0;
|
||||||
float oldDepthOffset = 0.0;
|
float oldDepthOffset = 0.0;
|
||||||
bool oldDepthmapEnable = false;
|
bool oldDepthmapEnable = false;
|
||||||
|
// Set by prepareDraw, tells us whether the current draw is using hw-accelerated shader
|
||||||
|
bool usingAcceleratedShader = false;
|
||||||
|
|
||||||
|
// Cached pointer to the current vertex shader when using HW accelerated shaders
|
||||||
|
OpenGL::Shader* generatedVertexShader = nullptr;
|
||||||
|
|
||||||
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
|
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
|
||||||
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
|
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
|
||||||
|
@ -74,7 +82,38 @@ class RendererGL final : public Renderer {
|
||||||
OpenGL::Program program;
|
OpenGL::Program program;
|
||||||
uint uboBinding;
|
uint uboBinding;
|
||||||
};
|
};
|
||||||
std::unordered_map<PICA::FragmentConfig, CachedProgram> shaderCache;
|
|
||||||
|
struct ShaderCache {
|
||||||
|
std::unordered_map<PICA::VertConfig, std::optional<OpenGL::Shader>> vertexShaderCache;
|
||||||
|
std::unordered_map<PICA::FragmentConfig, OpenGL::Shader> fragmentShaderCache;
|
||||||
|
|
||||||
|
// Program cache indexed by GLuints for the vertex and fragment shader to use
|
||||||
|
// Top 32 bits are the vertex shader GLuint, bottom 32 bits are the fs GLuint
|
||||||
|
std::unordered_map<u64, CachedProgram> programCache;
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
for (auto& it : programCache) {
|
||||||
|
CachedProgram& cachedProgram = it.second;
|
||||||
|
cachedProgram.program.free();
|
||||||
|
glDeleteBuffers(1, &cachedProgram.uboBinding);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& it : vertexShaderCache) {
|
||||||
|
if (it.second.has_value()) {
|
||||||
|
it.second->free();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& it : fragmentShaderCache) {
|
||||||
|
it.second.free();
|
||||||
|
}
|
||||||
|
|
||||||
|
programCache.clear();
|
||||||
|
vertexShaderCache.clear();
|
||||||
|
fragmentShaderCache.clear();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ShaderCache shaderCache;
|
||||||
|
|
||||||
OpenGL::Framebuffer getColourFBO();
|
OpenGL::Framebuffer getColourFBO();
|
||||||
OpenGL::Texture getTexture(Texture& tex);
|
OpenGL::Texture getTexture(Texture& tex);
|
||||||
|
@ -109,14 +148,13 @@ class RendererGL final : public Renderer {
|
||||||
virtual bool supportsShaderReload() override { return true; }
|
virtual bool supportsShaderReload() override { return true; }
|
||||||
virtual std::string getUbershader() override;
|
virtual std::string getUbershader() override;
|
||||||
virtual void setUbershader(const std::string& shader) override;
|
virtual void setUbershader(const std::string& shader) override;
|
||||||
virtual void prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) override;
|
virtual bool prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) override;
|
||||||
|
|
||||||
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
|
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
|
||||||
|
|
||||||
// Note: The caller is responsible for deleting the currently bound FBO before calling this
|
// Note: The caller is responsible for deleting the currently bound FBO before calling this
|
||||||
void setFBO(uint handle) { screenFramebuffer.m_handle = handle; }
|
void setFBO(uint handle) { screenFramebuffer.m_handle = handle; }
|
||||||
void resetStateManager() { gl.reset(); }
|
void resetStateManager() { gl.reset(); }
|
||||||
void clearShaderCache();
|
|
||||||
void initUbershader(OpenGL::Program& program);
|
void initUbershader(OpenGL::Program& program);
|
||||||
|
|
||||||
#ifdef PANDA3DS_FRONTEND_QT
|
#ifdef PANDA3DS_FRONTEND_QT
|
||||||
|
|
|
@ -123,27 +123,38 @@ void GPU::reset() {
|
||||||
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
|
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
|
||||||
// And whether we are going to use the shader JIT (second template parameter)
|
// And whether we are going to use the shader JIT (second template parameter)
|
||||||
void GPU::drawArrays(bool indexed) {
|
void GPU::drawArrays(bool indexed) {
|
||||||
renderer->prepareForDraw(shaderUnit, false);
|
const bool hwShaders = renderer->prepareForDraw(shaderUnit, false);
|
||||||
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
|
|
||||||
|
|
||||||
if (indexed) {
|
if (hwShaders) {
|
||||||
if (shaderJITEnabled)
|
if (indexed) {
|
||||||
drawArrays<true, true>();
|
drawArrays<true, ShaderExecMode::Hardware>();
|
||||||
else
|
} else {
|
||||||
drawArrays<true, false>();
|
drawArrays<false, ShaderExecMode::Hardware>();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (shaderJITEnabled)
|
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
|
||||||
drawArrays<false, true>();
|
|
||||||
else
|
if (indexed) {
|
||||||
drawArrays<false, false>();
|
if (shaderJITEnabled) {
|
||||||
|
drawArrays<true, ShaderExecMode::JIT>();
|
||||||
|
} else {
|
||||||
|
drawArrays<true, ShaderExecMode::Interpreter>();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (shaderJITEnabled) {
|
||||||
|
drawArrays<false, ShaderExecMode::JIT>();
|
||||||
|
} else {
|
||||||
|
drawArrays<false, ShaderExecMode::Interpreter>();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::array<PICA::Vertex, Renderer::vertexBufferSize> vertices;
|
static std::array<PICA::Vertex, Renderer::vertexBufferSize> vertices;
|
||||||
|
|
||||||
template <bool indexed, bool useShaderJIT>
|
template <bool indexed, ShaderExecMode mode>
|
||||||
void GPU::drawArrays() {
|
void GPU::drawArrays() {
|
||||||
if constexpr (useShaderJIT) {
|
if constexpr (mode == ShaderExecMode::JIT) {
|
||||||
shaderJIT.prepare(shaderUnit.vs);
|
shaderJIT.prepare(shaderUnit.vs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,29 +333,38 @@ void GPU::drawArrays() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
|
// Running shader on the CPU instead of the GPU
|
||||||
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
|
if constexpr (mode == ShaderExecMode::Interpreter || mode == ShaderExecMode::JIT) {
|
||||||
// Ie it might attribute #0 to v2, #1 to v7, etc
|
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
|
||||||
for (int j = 0; j < totalAttribCount; j++) {
|
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
|
||||||
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
|
// Ie it might map attribute #0 to v2, #1 to v7, etc
|
||||||
std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f));
|
for (int j = 0; j < totalAttribCount; j++) {
|
||||||
}
|
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
|
||||||
|
std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f));
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (useShaderJIT) {
|
if constexpr (mode == ShaderExecMode::JIT) {
|
||||||
shaderJIT.run(shaderUnit.vs);
|
shaderJIT.run(shaderUnit.vs);
|
||||||
} else {
|
} else {
|
||||||
shaderUnit.vs.run();
|
shaderUnit.vs.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
PICA::Vertex& out = vertices[i];
|
PICA::Vertex& out = vertices[i];
|
||||||
// Map shader outputs to fixed function properties
|
// Map shader outputs to fixed function properties
|
||||||
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
||||||
for (int i = 0; i < totalShaderOutputs; i++) {
|
for (int i = 0; i < totalShaderOutputs; i++) {
|
||||||
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
|
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
|
||||||
|
|
||||||
for (int j = 0; j < 4; j++) { // pls unroll
|
for (int j = 0; j < 4; j++) { // pls unroll
|
||||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||||
out.raw[mapping] = vsOutputRegisters[i][j];
|
out.raw[mapping] = vsOutputRegisters[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // Using hw shaders and running the shader on the CPU, just write the inputs to the attribute buffer directly
|
||||||
|
PICA::Vertex& out = vertices[i];
|
||||||
|
for (int j = 0; j < totalAttribCount; j++) {
|
||||||
|
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
|
||||||
|
std::memcpy(&out.raw[mapping], ¤tAttributes[j], sizeof(vec4f));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,11 +72,6 @@ std::string FragmentGenerator::getDefaultVertexShader() {
|
||||||
out float gl_ClipDistance[2];
|
out float gl_ClipDistance[2];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
vec4 abgr8888ToVec4(uint abgr) {
|
|
||||||
const float scale = 1.0 / 255.0;
|
|
||||||
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
|
|
||||||
}
|
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
gl_Position = a_coords;
|
gl_Position = a_coords;
|
||||||
vec4 colourAbs = abs(a_vertexColour);
|
vec4 colourAbs = abs(a_vertexColour);
|
||||||
|
@ -677,4 +672,58 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf
|
||||||
shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs
|
shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs
|
||||||
shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);";
|
shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);";
|
||||||
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
|
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, bool usingUbershader) {
|
||||||
|
if (usingUbershader) {
|
||||||
|
Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader");
|
||||||
|
return picaSource;
|
||||||
|
} else {
|
||||||
|
// TODO: Uniforms and don't hardcode fixed-function semantic indices...
|
||||||
|
std::string ret = picaSource;
|
||||||
|
if (api == API::GLES) {
|
||||||
|
ret += "\n#define USING_GLES\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
ret += R"(
|
||||||
|
out vec4 v_quaternion;
|
||||||
|
out vec4 v_colour;
|
||||||
|
out vec3 v_texcoord0;
|
||||||
|
out vec2 v_texcoord1;
|
||||||
|
out vec3 v_view;
|
||||||
|
out vec2 v_texcoord2;
|
||||||
|
|
||||||
|
#ifndef USING_GLES
|
||||||
|
out float gl_ClipDistance[2];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
pica_shader_main();
|
||||||
|
vec4 a_coords = output_registers[0];
|
||||||
|
vec4 a_vertexColour = output_registers[1];
|
||||||
|
vec2 a_texcoord0 = output_registers[2].xy;
|
||||||
|
float a_texcoord0_w = output_registers[2].w;
|
||||||
|
vec2 a_texcoord1 = output_registers[3].xy;
|
||||||
|
vec2 a_texcoord2 = output_registers[4].xy;
|
||||||
|
vec3 a_view = output_registers[5].xyz;
|
||||||
|
vec4 a_quaternion = output_registers[6];
|
||||||
|
|
||||||
|
gl_Position = a_coords;
|
||||||
|
vec4 colourAbs = abs(a_vertexColour);
|
||||||
|
v_colour = min(colourAbs, vec4(1.f));
|
||||||
|
|
||||||
|
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
|
||||||
|
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
|
||||||
|
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
|
||||||
|
v_view = a_view;
|
||||||
|
v_quaternion = a_quaternion;
|
||||||
|
|
||||||
|
#ifndef USING_GLES
|
||||||
|
//gl_ClipDistance[0] = -a_coords.z;
|
||||||
|
//gl_ClipDistance[1] = dot(clipCoords, a_coords);
|
||||||
|
#endif
|
||||||
|
})";
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -25,7 +25,7 @@ void RendererGL::reset() {
|
||||||
colourBufferCache.reset();
|
colourBufferCache.reset();
|
||||||
textureCache.reset();
|
textureCache.reset();
|
||||||
|
|
||||||
clearShaderCache();
|
shaderCache.clear();
|
||||||
|
|
||||||
// Init the colour/depth buffer settings to some random defaults on reset
|
// Init the colour/depth buffer settings to some random defaults on reset
|
||||||
colourBufferLoc = 0;
|
colourBufferLoc = 0;
|
||||||
|
@ -788,18 +788,24 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
|
||||||
|
|
||||||
PICA::FragmentConfig fsConfig(regs);
|
PICA::FragmentConfig fsConfig(regs);
|
||||||
|
|
||||||
CachedProgram& programEntry = shaderCache[fsConfig];
|
OpenGL::Shader& fragShader = shaderCache.fragmentShaderCache[fsConfig];
|
||||||
|
if (!fragShader.exists()) {
|
||||||
|
std::string fs = fragShaderGen.generate(fsConfig);
|
||||||
|
fragShader.create({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the handle of the current vertex shader
|
||||||
|
OpenGL::Shader& vertexShader = usingAcceleratedShader ? *generatedVertexShader : defaultShadergenVs;
|
||||||
|
// And form the key for looking up a shader program
|
||||||
|
const u64 programKey = (u64(vertexShader.handle()) << 32) | u64(fragShader.handle());
|
||||||
|
|
||||||
|
CachedProgram& programEntry = shaderCache.programCache[programKey];
|
||||||
OpenGL::Program& program = programEntry.program;
|
OpenGL::Program& program = programEntry.program;
|
||||||
|
|
||||||
if (!program.exists()) {
|
if (!program.exists()) {
|
||||||
std::string fs = fragShaderGen.generate(fsConfig);
|
program.create({vertexShader, fragShader});
|
||||||
|
|
||||||
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
|
|
||||||
program.create({defaultShadergenVs, fragShader});
|
|
||||||
gl.useProgram(program);
|
gl.useProgram(program);
|
||||||
|
|
||||||
fragShader.free();
|
|
||||||
|
|
||||||
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
|
||||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
|
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
|
||||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
||||||
|
@ -904,15 +910,8 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
|
bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
|
||||||
std::string vertShaderSource = PICA::ShaderGen::decompileShader(
|
// First we figure out if we will be using an ubershader
|
||||||
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
|
|
||||||
);
|
|
||||||
|
|
||||||
OpenGL::Shader vert({vertShaderSource.c_str(), vertShaderSource.size()}, OpenGL::Vertex);
|
|
||||||
//triangleProgram.create({vert, frag});
|
|
||||||
std::cout << vertShaderSource << "\n";
|
|
||||||
|
|
||||||
bool usingUbershader = emulatorConfig->useUbershaders;
|
bool usingUbershader = emulatorConfig->useUbershaders;
|
||||||
if (usingUbershader) {
|
if (usingUbershader) {
|
||||||
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
|
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
|
||||||
|
@ -925,6 +924,46 @@ void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Then we figure out if we will use hw accelerated shaders, and try to fetch our shader
|
||||||
|
// TODO: Ubershader support for accelerated shaders
|
||||||
|
usingAcceleratedShader = emulatorConfig->accelerateShaders && !isImmediateMode && !usingUbershader;
|
||||||
|
|
||||||
|
if (usingAcceleratedShader) {
|
||||||
|
auto shaderCodeHash = shaderUnit.vs.getCodeHash();
|
||||||
|
auto opdescHash = shaderUnit.vs.getOpdescHash();
|
||||||
|
auto vertexConfig = PICA::VertConfig{
|
||||||
|
.shaderHash = shaderCodeHash,
|
||||||
|
.opdescHash = opdescHash,
|
||||||
|
.entrypoint = shaderUnit.vs.entrypoint,
|
||||||
|
.usingUbershader = usingUbershader,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::optional<OpenGL::Shader>& shader = shaderCache.vertexShaderCache[vertexConfig];
|
||||||
|
// If the optional is false, we have never tried to recompile the shader before. Try to recompile it and see if it works.
|
||||||
|
if (!shader.has_value()) {
|
||||||
|
// Initialize shader to a "null" shader (handle == 0)
|
||||||
|
*shader = OpenGL::Shader();
|
||||||
|
|
||||||
|
std::string picaShaderSource = PICA::ShaderGen::decompileShader(
|
||||||
|
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
|
||||||
|
);
|
||||||
|
|
||||||
|
// Empty source means compilation error, if the source is not empty then we convert the rcompiled PICA code into a valid shader and upload
|
||||||
|
// it to the GPU
|
||||||
|
if (!picaShaderSource.empty()) {
|
||||||
|
std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, usingUbershader);
|
||||||
|
shader->create({vertexShaderSource}, OpenGL::Vertex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shader generation did not work out, so set usingAcceleratedShader to false
|
||||||
|
if (!shader->exists()) {
|
||||||
|
usingAcceleratedShader = false;
|
||||||
|
} else {
|
||||||
|
generatedVertexShader = &(*shader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (usingUbershader) {
|
if (usingUbershader) {
|
||||||
gl.useProgram(triangleProgram);
|
gl.useProgram(triangleProgram);
|
||||||
} else {
|
} else {
|
||||||
|
@ -958,6 +997,8 @@ void RendererGL::prepareForDraw(ShaderUnit& shaderUnit, bool isImmediateMode) {
|
||||||
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]);
|
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, ®s[0x48]);
|
||||||
setupUbershaderTexEnv();
|
setupUbershaderTexEnv();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return usingAcceleratedShader;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererGL::screenshot(const std::string& name) {
|
void RendererGL::screenshot(const std::string& name) {
|
||||||
|
@ -985,22 +1026,12 @@ void RendererGL::screenshot(const std::string& name) {
|
||||||
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
|
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererGL::clearShaderCache() {
|
|
||||||
for (auto& shader : shaderCache) {
|
|
||||||
CachedProgram& cachedProgram = shader.second;
|
|
||||||
cachedProgram.program.free();
|
|
||||||
glDeleteBuffers(1, &cachedProgram.uboBinding);
|
|
||||||
}
|
|
||||||
|
|
||||||
shaderCache.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RendererGL::deinitGraphicsContext() {
|
void RendererGL::deinitGraphicsContext() {
|
||||||
// Invalidate all surface caches since they'll no longer be valid
|
// Invalidate all surface caches since they'll no longer be valid
|
||||||
textureCache.reset();
|
textureCache.reset();
|
||||||
depthBufferCache.reset();
|
depthBufferCache.reset();
|
||||||
colourBufferCache.reset();
|
colourBufferCache.reset();
|
||||||
clearShaderCache();
|
shaderCache.clear();
|
||||||
|
|
||||||
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
|
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
|
||||||
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
|
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
|
||||||
|
@ -1048,4 +1079,4 @@ void RendererGL::initUbershader(OpenGL::Program& program) {
|
||||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
|
||||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
|
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
|
||||||
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
|
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
|
||||||
}
|
}
|
Loading…
Add table
Reference in a new issue