mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 14:15:41 +12:00
Rewriting hw vertex fetch
This commit is contained in:
parent
15b6a9e2d9
commit
4a39b06262
5 changed files with 107 additions and 95 deletions
|
@ -6,32 +6,37 @@
|
|||
|
||||
namespace PICA {
|
||||
struct DrawAcceleration {
|
||||
static constexpr u32 maxAttribCount = 12;
|
||||
static constexpr u32 maxAttribCount = 16;
|
||||
static constexpr u32 maxLoaderCount = 12;
|
||||
|
||||
struct AttributeInfo {
|
||||
u8* data;
|
||||
u32 offset;
|
||||
u32 size;
|
||||
u32 stride;
|
||||
|
||||
u8 inputReg; // Which input reg should this attribute go to in the vertex shader?
|
||||
u8 type;
|
||||
u8 componentCount;
|
||||
bool fixed;
|
||||
bool isPadding;
|
||||
|
||||
std::array<float, 4> fixedValue; // For fixed attributes
|
||||
};
|
||||
|
||||
struct Loader {
|
||||
// Data to upload for this loader
|
||||
u8* data;
|
||||
usize size;
|
||||
};
|
||||
|
||||
u8* indexBuffer;
|
||||
|
||||
// Minimum and maximum index in the index buffer for a draw call
|
||||
u16 minimumIndex, maximumIndex;
|
||||
u32 totalAttribCount;
|
||||
u32 totalLoaderCount;
|
||||
u32 enabledAttributeMask;
|
||||
u32 fixedAttributes;
|
||||
u32 vertexDataSize;
|
||||
|
||||
std::array<AttributeInfo, maxAttribCount> attributeInfo;
|
||||
std::array<Loader, maxLoaderCount> loaders;
|
||||
|
||||
bool canBeAccelerated;
|
||||
bool indexed;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include "PICA/draw_acceleration.hpp"
|
||||
|
||||
#include <bit>
|
||||
#include <limits>
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
|
@ -53,88 +54,94 @@ void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
|
|||
const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32);
|
||||
const u64 inputAttrCfg = getVertexShaderInputConfig();
|
||||
|
||||
u32 buffer = 0;
|
||||
u32 attrCount = 0;
|
||||
u32 loaderOffset = 0;
|
||||
accel.vertexDataSize = 0;
|
||||
accel.totalLoaderCount = 0;
|
||||
|
||||
while (attrCount < totalAttribCount) {
|
||||
bool fixedAttrib = (fixedAttribMask & (1 << attrCount)) != 0;
|
||||
for (int i = 0; i < PICA::DrawAcceleration::maxLoaderCount; i++) {
|
||||
auto& loaderData = attributeInfo[i]; // Get information for this attribute loader
|
||||
|
||||
// Variable attribute attribute
|
||||
if (!fixedAttrib) {
|
||||
auto& attrData = attributeInfo[buffer]; // Get information for this attribute
|
||||
u64 attrCfg = attrData.getConfigFull(); // Get config1 | (config2 << 32)
|
||||
// This loader is empty, skip it
|
||||
if (loaderData.componentCount == 0 || loaderData.size == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attrData.componentCount != 0) {
|
||||
// Size of the attribute in bytes multiplied by the total number of vertices
|
||||
const u32 bytes = attrData.size * vertexCount;
|
||||
// Add it to the total vertex data size, aligned to 4 bytes.
|
||||
accel.vertexDataSize += (bytes + 3) & ~3;
|
||||
auto& loader = accel.loaders[accel.totalLoaderCount++];
|
||||
|
||||
// The size of the loader in bytes is equal to the bytes supplied for 1 vertex, multiplied by the number of vertices we'll be uploading
|
||||
// Which is equal to maximumIndex - minimumIndex + 1
|
||||
const u32 bytes = loaderData.size * (accel.maximumIndex - accel.minimumIndex + 1);
|
||||
loader.size = bytes;
|
||||
|
||||
// Add it to the total vertex data size, aligned to 4 bytes.
|
||||
accel.vertexDataSize += (bytes + 3) & ~3;
|
||||
|
||||
// Get a pointer to the data where this loader's data is stored
|
||||
const u32 loaderAddress = vertexBase + loaderData.offset + (accel.minimumIndex * loaderData.size);
|
||||
loader.data = getPointerPhys<u8>(loaderAddress);
|
||||
|
||||
u64 attrCfg = loaderData.getConfigFull(); // Get config1 | (config2 << 32)
|
||||
u32 attributeOffset = 0;
|
||||
|
||||
for (int component = 0; component < loaderData.componentCount; component++) {
|
||||
uint attributeIndex = (attrCfg >> (component * 4)) & 0xf; // Get index of attribute in vertexCfg
|
||||
|
||||
// Vertex attributes used as padding
|
||||
// 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively
|
||||
if (attributeIndex >= 12) [[unlikely]] {
|
||||
Helpers::panic("Padding attribute");
|
||||
// Align attribute address up to a 4 byte boundary
|
||||
attributeOffset = (attributeOffset + 3) & -4;
|
||||
attributeOffset += (attributeIndex - 11) << 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 attributeOffset = 0;
|
||||
for (int i = 0; i < attrData.componentCount; i++) {
|
||||
uint index = (attrCfg >> (i * 4)) & 0xf; // Get index of attribute in vertexCfg
|
||||
auto& attr = accel.attributeInfo[attrCount];
|
||||
attr.fixed = false;
|
||||
const u32 attribInfo = (vertexCfg >> (attributeIndex * 4)) & 0xf;
|
||||
const u32 attribType = attribInfo & 0x3; // Type of attribute (sbyte/ubyte/short/float)
|
||||
const u32 size = (attribInfo >> 2) + 1; // Total number of components
|
||||
|
||||
// Vertex attributes used as padding
|
||||
// 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively
|
||||
if (index >= 12) [[unlikely]] {
|
||||
Helpers::panic("Padding attribute");
|
||||
// Align attribute address up to a 4 byte boundary
|
||||
attributeOffset = (attributeOffset + 3) & -4;
|
||||
attributeOffset += (index - 11) << 2;
|
||||
// Size of each component based on the attribute type
|
||||
static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4};
|
||||
const u32 inputReg = (inputAttrCfg >> (attributeIndex * 4)) & 0xf;
|
||||
// Mark the attribute as enabled
|
||||
accel.enabledAttributeMask |= 1 << inputReg;
|
||||
|
||||
attr.data = nullptr;
|
||||
attr.isPadding = true;
|
||||
continue;
|
||||
}
|
||||
auto& attr = accel.attributeInfo[inputReg];
|
||||
attr.componentCount = size;
|
||||
attr.offset = attributeOffset + loaderOffset;
|
||||
attr.stride = loaderData.size;
|
||||
attr.type = attribType;
|
||||
attributeOffset += size * sizePerComponent[attribType];
|
||||
}
|
||||
|
||||
const u32 attribInfo = (vertexCfg >> (index * 4)) & 0xf;
|
||||
const u32 attribType = attribInfo & 0x3; // Type of attribute (sbyte/ubyte/short/float)
|
||||
const u32 size = (attribInfo >> 2) + 1; // Total number of components
|
||||
|
||||
// Size of each component based on the attribute type
|
||||
static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4};
|
||||
const u32 inputReg = (inputAttrCfg >> (attrCount * 4)) & 0xf;
|
||||
// Mark the attribute as enabled
|
||||
accel.enabledAttributeMask |= 1 << inputReg;
|
||||
loaderOffset += loader.size;
|
||||
}
|
||||
|
||||
// Get a pointer to the data where this attribute is stored
|
||||
const u32 attrAddress = vertexBase + attributeOffset + attrData.offset + (accel.minimumIndex * attrData.size);
|
||||
u32 fixedAttributes = fixedAttribMask;
|
||||
accel.fixedAttributes = 0;
|
||||
|
||||
attr.data = getPointerPhys<u8>(attrAddress);
|
||||
attr.inputReg = inputReg;
|
||||
attr.componentCount = size;
|
||||
attr.offset = attributeOffset;
|
||||
attr.size = size * sizePerComponent[attribType];
|
||||
attr.stride = attrData.size;
|
||||
attr.type = attribType;
|
||||
attr.isPadding = false;
|
||||
attributeOffset += attr.size;
|
||||
// Fetch values for all fixed attributes using CLZ on the fixed attribute mask to find the attributes that are actually fixed
|
||||
while (fixedAttributes != 0) {
|
||||
// Get index of next fixed attribute and turn it off
|
||||
const u32 index = std::countr_zero<u32>(fixedAttributes);
|
||||
const u32 mask = 1u << index;
|
||||
fixedAttributes ^= mask;
|
||||
|
||||
attrCount += 1;
|
||||
}
|
||||
// PICA register this fixed attribute is meant to go to
|
||||
const u32 inputReg = (inputAttrCfg >> (index * 4)) & 0xf;
|
||||
const u32 inputRegMask = 1u << inputReg;
|
||||
|
||||
buffer += 1;
|
||||
} else {
|
||||
vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount];
|
||||
auto& attr = accel.attributeInfo[attrCount];
|
||||
// If this input reg is already used for a non-fixed attribute then it will not be replaced by a fixed attribute
|
||||
if ((accel.enabledAttributeMask & inputRegMask) == 0) {
|
||||
vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[index];
|
||||
auto& attr = accel.attributeInfo[inputReg];
|
||||
|
||||
attr.fixed = true;
|
||||
// Set the data pointer to nullptr in order to catch any potential bugs
|
||||
attr.data = nullptr;
|
||||
attr.isPadding = false;
|
||||
accel.fixedAttributes |= inputRegMask;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
attr.fixedValue[i] = fixedAttr[i].toFloat32();
|
||||
}
|
||||
|
||||
const u32 inputReg = (inputAttrCfg >> (attrCount * 4)) & 0xf;
|
||||
|
||||
attr.inputReg = inputReg;
|
||||
attrCount += 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -337,8 +337,6 @@ void GPU::drawArrays() {
|
|||
}
|
||||
|
||||
// Fill the remaining attribute lanes with default parameters (1.0 for alpha/w, 0.0) for everything else
|
||||
// Corgi does this although I'm not sure if it's actually needed for anything.
|
||||
// TODO: Find out
|
||||
while (component < 4) {
|
||||
attribute[component] = (component == 3) ? f24::fromFloat32(1.0) : f24::fromFloat32(0.0);
|
||||
component++;
|
||||
|
|
|
@ -508,7 +508,7 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
|
||||
} else {
|
||||
if (performIndexedRender) {
|
||||
// When doing indexed rendering, bind the EBO and use glDrawRangeElementsBaseVertex to issue the indexed draw
|
||||
// When doing indexed rendering, use glDrawRangeElementsBaseVertex to issue the indexed draw
|
||||
hwIndexBuffer->Bind();
|
||||
glDrawRangeElementsBaseVertex(
|
||||
primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
|
||||
|
@ -1165,12 +1165,13 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele
|
|||
hwVertexBuffer->Bind();
|
||||
auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize);
|
||||
u8* vertexData = static_cast<u8*>(vertexBufferRes.pointer);
|
||||
const u32 vertexBufferOffset = vertexBufferRes.buffer_offset;
|
||||
|
||||
gl.bindVAO(hwShaderVAO);
|
||||
|
||||
// Enable or disable vertex attributes as needed
|
||||
const u32 currentAttributeMask = accel->enabledAttributeMask;
|
||||
// Use bitwise xor to calculate which attributes chanced
|
||||
// Use bitwise xor to calculate which attributes changed
|
||||
u32 attributeMaskDiff = currentAttributeMask ^ previousAttributeMask;
|
||||
|
||||
while (attributeMaskDiff != 0) {
|
||||
|
@ -1190,29 +1191,30 @@ void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAccele
|
|||
|
||||
previousAttributeMask = currentAttributeMask;
|
||||
|
||||
for (int i = 0; i < totalAttribCount; i++) {
|
||||
const auto& attrib = accel->attributeInfo[i];
|
||||
// Upload the data for each (enabled) attribute loader into our vertex buffer
|
||||
for (int i = 0; i < accel->totalLoaderCount; i++) {
|
||||
auto& loader = accel->loaders[i];
|
||||
|
||||
if (attrib.fixed) {
|
||||
if ((currentAttributeMask & (1u << i)) == 0) {
|
||||
glVertexAttrib4f(attrib.inputReg, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]);
|
||||
}
|
||||
} else {
|
||||
if (attrib.isPadding) [[unlikely]] {
|
||||
continue;
|
||||
}
|
||||
|
||||
const u32 attributeSize = attrib.size * vertexCount;
|
||||
std::memcpy(vertexData, attrib.data, attributeSize);
|
||||
|
||||
vertexData += attributeSize;
|
||||
|
||||
glVertexAttribPointer(
|
||||
attrib.inputReg, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride,
|
||||
reinterpret_cast<GLvoid*>(vertexBufferRes.buffer_offset + attrib.offset)
|
||||
);
|
||||
}
|
||||
std::memcpy(vertexData, loader.data, loader.size);
|
||||
vertexData += loader.size;
|
||||
}
|
||||
|
||||
hwVertexBuffer->Unmap(accel->vertexDataSize);
|
||||
|
||||
// Iterate over the 16 PICA input registers and configure how they should be fetched.
|
||||
for (int i = 0; i < 16; i++) {
|
||||
const auto& attrib = accel->attributeInfo[i];
|
||||
const u32 attributeMask = 1u << i;
|
||||
|
||||
if (accel->fixedAttributes & attributeMask) {
|
||||
// This is a fixed attribute, so set its fixed value
|
||||
// TODO: Don't update these if the value does not change, it generates way too many calls
|
||||
glVertexAttrib4f(i, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]);
|
||||
} else if (accel->enabledAttributeMask & attributeMask) {
|
||||
glVertexAttribPointer(
|
||||
i, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride,
|
||||
reinterpret_cast<GLvoid*>(vertexBufferOffset + attrib.offset)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
2
third_party/duckstation/gl/stream_buffer.cpp
vendored
2
third_party/duckstation/gl/stream_buffer.cpp
vendored
|
@ -149,7 +149,7 @@ namespace {
|
|||
const u32 end = std::min<u32>(GetSyncIndexForOffset(offset) + 1, NUM_SYNC_POINTS);
|
||||
for (; m_available_block_index < end; m_available_block_index++) {
|
||||
if (!m_sync_objects[m_used_block_index]) [[unlikely]] {
|
||||
Helpers::warn("GL stream buffer: Fence slot we're trying to wait on in not in use");
|
||||
Helpers::warn("GL stream buffer: Fence slot we're trying to wait on is not in use");
|
||||
}
|
||||
|
||||
WaitForSync(m_sync_objects[m_available_block_index]);
|
||||
|
|
Loading…
Add table
Reference in a new issue