mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-07 22:55:40 +12:00
Fix control flow analysis bug
This commit is contained in:
parent
0e7697dc67
commit
e332ab2e58
6 changed files with 54 additions and 14 deletions
|
@ -14,6 +14,7 @@ namespace PICA {
|
||||||
u32 size;
|
u32 size;
|
||||||
u32 stride;
|
u32 stride;
|
||||||
|
|
||||||
|
u8 inputReg; // Which input reg should this attribute go to in the vertex shader?
|
||||||
u8 type;
|
u8 type;
|
||||||
u8 componentCount;
|
u8 componentCount;
|
||||||
bool fixed;
|
bool fixed;
|
||||||
|
@ -27,6 +28,7 @@ namespace PICA {
|
||||||
// Minimum and maximum index in the index buffer for a draw call
|
// Minimum and maximum index in the index buffer for a draw call
|
||||||
u16 minimumIndex, maximumIndex;
|
u16 minimumIndex, maximumIndex;
|
||||||
u32 totalAttribCount;
|
u32 totalAttribCount;
|
||||||
|
u32 enabledAttributeMask;
|
||||||
u32 vertexDataSize;
|
u32 vertexDataSize;
|
||||||
|
|
||||||
std::array<AttributeInfo, maxAttribCount> attributeInfo;
|
std::array<AttributeInfo, maxAttribCount> attributeInfo;
|
||||||
|
|
|
@ -70,6 +70,9 @@ class RendererGL final : public Renderer {
|
||||||
GLuint maximumIndex = 0;
|
GLuint maximumIndex = 0;
|
||||||
void* hwIndexBufferOffset = nullptr;
|
void* hwIndexBufferOffset = nullptr;
|
||||||
|
|
||||||
|
// When doing hw shaders, we cache which attributes are enabled in our VAO to avoid having to enable/disable all attributes on each draw
|
||||||
|
u32 previousAttributeMask = 0;
|
||||||
|
|
||||||
// Cached pointer to the current vertex shader when using HW accelerated shaders
|
// Cached pointer to the current vertex shader when using HW accelerated shaders
|
||||||
OpenGL::Shader* generatedVertexShader = nullptr;
|
OpenGL::Shader* generatedVertexShader = nullptr;
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
||||||
accel.indexed = indexed;
|
accel.indexed = indexed;
|
||||||
accel.totalAttribCount = totalAttribCount;
|
accel.totalAttribCount = totalAttribCount;
|
||||||
|
accel.enabledAttributeMask = 0;
|
||||||
|
|
||||||
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
||||||
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
||||||
|
@ -50,6 +51,8 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32);
|
const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32);
|
||||||
|
const u64 inputAttrCfg = getVertexShaderInputConfig();
|
||||||
|
|
||||||
u32 buffer = 0;
|
u32 buffer = 0;
|
||||||
u32 attrCount = 0;
|
u32 attrCount = 0;
|
||||||
accel.vertexDataSize = 0;
|
accel.vertexDataSize = 0;
|
||||||
|
@ -94,7 +97,11 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
||||||
|
|
||||||
// Size of each component based on the attribute type
|
// Size of each component based on the attribute type
|
||||||
static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4};
|
static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4};
|
||||||
|
const u32 inputReg = (inputAttrCfg >> (attrCount * 4)) & 0xf;
|
||||||
|
// Mark the attribute as enabled
|
||||||
|
accel.enabledAttributeMask |= 1 << inputReg;
|
||||||
|
|
||||||
|
attr.inputReg = inputReg;
|
||||||
attr.componentCount = size;
|
attr.componentCount = size;
|
||||||
attr.offset = attributeOffset;
|
attr.offset = attributeOffset;
|
||||||
attr.size = size * sizePerComponent[attribType];
|
attr.size = size * sizePerComponent[attribType];
|
||||||
|
@ -123,6 +130,9 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
||||||
attr.fixedValue[i] = fixedAttr[i].toFloat32();
|
attr.fixedValue[i] = fixedAttr[i].toFloat32();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const u32 inputReg = (inputAttrCfg >> (attrCount * 4)) & 0xf;
|
||||||
|
|
||||||
|
attr.inputReg = inputReg;
|
||||||
attrCount += 1;
|
attrCount += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,7 +79,7 @@ ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 e
|
||||||
|
|
||||||
// This opens up 2 parallel paths of execution
|
// This opens up 2 parallel paths of execution
|
||||||
auto branchTakenExit = analyzeFunction(shader, dest, end, labels);
|
auto branchTakenExit = analyzeFunction(shader, dest, end, labels);
|
||||||
auto branchNotTakenExit = analyzeFunction(shader, pc + 1, dest, labels);
|
auto branchNotTakenExit = analyzeFunction(shader, pc + 1, end, labels);
|
||||||
it->second = exitParallel(branchTakenExit, branchNotTakenExit);
|
it->second = exitParallel(branchTakenExit, branchNotTakenExit);
|
||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
@ -122,6 +122,7 @@ ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 e
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ShaderOpcodes::CALL: {
|
case ShaderOpcodes::CALL: {
|
||||||
const u32 num = instruction & 0xff;
|
const u32 num = instruction & 0xff;
|
||||||
const u32 dest = getBits<10, 12>(instruction);
|
const u32 dest = getBits<10, 12>(instruction);
|
||||||
|
|
|
@ -778,8 +778,6 @@ void main() {
|
||||||
gl_ClipDistance[1] = dot(clipCoords, a_coords);
|
gl_ClipDistance[1] = dot(clipCoords, a_coords);
|
||||||
#endif
|
#endif
|
||||||
})";
|
})";
|
||||||
|
|
||||||
std::cout << ret << "\n";
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
#include <stb_image_write.h>
|
#include <stb_image_write.h>
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
#include <cmrc/cmrc.hpp>
|
#include <cmrc/cmrc.hpp>
|
||||||
|
|
||||||
#include "PICA/float_types.hpp"
|
#include "PICA/float_types.hpp"
|
||||||
|
@ -987,7 +988,7 @@ bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration*
|
||||||
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
|
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint, PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
|
||||||
);
|
);
|
||||||
|
|
||||||
// Empty source means compilation error, if the source is not empty then we convert the rcompiled PICA code into a valid shader and upload
|
// Empty source means compilation error, if the source is not empty then we convert the recompiled PICA code into a valid shader and upload
|
||||||
// it to the GPU
|
// it to the GPU
|
||||||
if (!picaShaderSource.empty()) {
|
if (!picaShaderSource.empty()) {
|
||||||
std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, vertexConfig, usingUbershader);
|
std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, vertexConfig, usingUbershader);
|
||||||
|
@ -1167,24 +1168,49 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele
|
||||||
|
|
||||||
gl.bindVAO(hwShaderVAO);
|
gl.bindVAO(hwShaderVAO);
|
||||||
|
|
||||||
|
// Enable or disable vertex attributes as needed
|
||||||
|
const u32 currentAttributeMask = accel->enabledAttributeMask;
|
||||||
|
// Use bitwise xor to calculate which attributes chanced
|
||||||
|
u32 attributeMaskDiff = currentAttributeMask ^ previousAttributeMask;
|
||||||
|
|
||||||
|
while (attributeMaskDiff != 0) {
|
||||||
|
// Get index of next different attribute and turn it off
|
||||||
|
const u32 index = 31 - std::countl_zero<u32>(attributeMaskDiff);
|
||||||
|
const u32 mask = 1u << index;
|
||||||
|
attributeMaskDiff ^= mask;
|
||||||
|
|
||||||
|
if ((currentAttributeMask & mask) != 0) {
|
||||||
|
// Attribute was disabled and is now enabled
|
||||||
|
hwShaderVAO.enableAttribute(index);
|
||||||
|
} else {
|
||||||
|
// Attribute was enabled and is now disabled
|
||||||
|
hwShaderVAO.disableAttribute(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
previousAttributeMask = currentAttributeMask;
|
||||||
|
|
||||||
for (int i = 0; i < totalAttribCount; i++) {
|
for (int i = 0; i < totalAttribCount; i++) {
|
||||||
const auto& attrib = accel->attributeInfo[i];
|
const auto& attrib = accel->attributeInfo[i];
|
||||||
|
|
||||||
if (attrib.fixed) {
|
if (attrib.fixed) {
|
||||||
Helpers::panic("Fixed attribute!");
|
if ((currentAttributeMask & (1u << i)) == 0) {
|
||||||
|
glVertexAttrib4f(attrib.inputReg, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (attrib.isPadding) {
|
if (attrib.isPadding) [[unlikely]] {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
glVertexAttribPointer(i, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride, reinterpret_cast<GLvoid*>(vertexBufferRes.buffer_offset + attrib.offset));
|
|
||||||
// TODO: Disable unused attributes as well
|
|
||||||
hwShaderVAO.enableAttribute(i);
|
|
||||||
|
|
||||||
const u32 attributeSize = attrib.size * vertexCount;
|
const u32 attributeSize = attrib.size * vertexCount;
|
||||||
std::memcpy(vertexData, attrib.data, attributeSize);
|
std::memcpy(vertexData, attrib.data, attributeSize);
|
||||||
|
|
||||||
vertexData += attributeSize;
|
vertexData += attributeSize;
|
||||||
|
|
||||||
|
glVertexAttribPointer(
|
||||||
|
attrib.inputReg, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride,
|
||||||
|
reinterpret_cast<GLvoid*>(vertexBufferRes.buffer_offset + attrib.offset)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue