Hook up vertex shaders to shader cache

This commit is contained in:
wheremyfoodat 2024-07-25 04:04:41 +03:00
parent 251ff5ee49
commit 2f4c169cad
10 changed files with 256 additions and 77 deletions

View file

@ -123,27 +123,38 @@ void GPU::reset() {
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
// And whether we are going to use the shader JIT (second template parameter)
void GPU::drawArrays(bool indexed) {
renderer->prepareForDraw(shaderUnit, false);
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
const bool hwShaders = renderer->prepareForDraw(shaderUnit, false);
if (indexed) {
if (shaderJITEnabled)
drawArrays<true, true>();
else
drawArrays<true, false>();
if (hwShaders) {
if (indexed) {
drawArrays<true, ShaderExecMode::Hardware>();
} else {
drawArrays<false, ShaderExecMode::Hardware>();
}
} else {
if (shaderJITEnabled)
drawArrays<false, true>();
else
drawArrays<false, false>();
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
if (indexed) {
if (shaderJITEnabled) {
drawArrays<true, ShaderExecMode::JIT>();
} else {
drawArrays<true, ShaderExecMode::Interpreter>();
}
} else {
if (shaderJITEnabled) {
drawArrays<false, ShaderExecMode::JIT>();
} else {
drawArrays<false, ShaderExecMode::Interpreter>();
}
}
}
}
static std::array<PICA::Vertex, Renderer::vertexBufferSize> vertices;
template <bool indexed, bool useShaderJIT>
template <bool indexed, ShaderExecMode mode>
void GPU::drawArrays() {
if constexpr (useShaderJIT) {
if constexpr (mode == ShaderExecMode::JIT) {
shaderJIT.prepare(shaderUnit.vs);
}
@ -322,29 +333,38 @@ void GPU::drawArrays() {
}
}
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
// Ie it might attribute #0 to v2, #1 to v7, etc
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&shaderUnit.vs.inputs[mapping], &currentAttributes[j], sizeof(vec4f));
}
// Running shader on the CPU instead of the GPU
if constexpr (mode == ShaderExecMode::Interpreter || mode == ShaderExecMode::JIT) {
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
// Ie it might map attribute #0 to v2, #1 to v7, etc
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&shaderUnit.vs.inputs[mapping], &currentAttributes[j], sizeof(vec4f));
}
if constexpr (useShaderJIT) {
shaderJIT.run(shaderUnit.vs);
} else {
shaderUnit.vs.run();
}
if constexpr (mode == ShaderExecMode::JIT) {
shaderJIT.run(shaderUnit.vs);
} else {
shaderUnit.vs.run();
}
PICA::Vertex& out = vertices[i];
// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
PICA::Vertex& out = vertices[i];
// Map shader outputs to fixed function properties
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
for (int i = 0; i < totalShaderOutputs; i++) {
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
for (int j = 0; j < 4; j++) { // pls unroll
const u32 mapping = (config >> (j * 8)) & 0x1F;
out.raw[mapping] = vsOutputRegisters[i][j];
for (int j = 0; j < 4; j++) { // pls unroll
const u32 mapping = (config >> (j * 8)) & 0x1F;
out.raw[mapping] = vsOutputRegisters[i][j];
}
}
} else { // Using hw shaders and running the shader on the CPU, just write the inputs to the attribute buffer directly
PICA::Vertex& out = vertices[i];
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&out.raw[mapping], &currentAttributes[j], sizeof(vec4f));
}
}
}

View file

@ -72,11 +72,6 @@ std::string FragmentGenerator::getDefaultVertexShader() {
out float gl_ClipDistance[2];
#endif
vec4 abgr8888ToVec4(uint abgr) {
const float scale = 1.0 / 255.0;
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
}
void main() {
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
@ -677,4 +672,58 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf
shader += "vec2 value = texelFetch(u_tex_luts, ivec2(int(clamped_index), 24), 0).rg;"; // fog LUT is past the light LUTs
shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);";
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
}
std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, bool usingUbershader) {
if (usingUbershader) {
Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader");
return picaSource;
} else {
// TODO: Uniforms and don't hardcode fixed-function semantic indices...
std::string ret = picaSource;
if (api == API::GLES) {
ret += "\n#define USING_GLES\n";
}
ret += R"(
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
out vec3 v_view;
out vec2 v_texcoord2;
#ifndef USING_GLES
out float gl_ClipDistance[2];
#endif
void main() {
pica_shader_main();
vec4 a_coords = output_registers[0];
vec4 a_vertexColour = output_registers[1];
vec2 a_texcoord0 = output_registers[2].xy;
float a_texcoord0_w = output_registers[2].w;
vec2 a_texcoord1 = output_registers[3].xy;
vec2 a_texcoord2 = output_registers[4].xy;
vec3 a_view = output_registers[5].xyz;
vec4 a_quaternion = output_registers[6];
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
v_colour = min(colourAbs, vec4(1.f));
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_quaternion = a_quaternion;
#ifndef USING_GLES
//gl_ClipDistance[0] = -a_coords.z;
//gl_ClipDistance[1] = dot(clipCoords, a_coords);
#endif
})";
return ret;
}
}