mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-05-05 19:54:49 +12:00
Merge remote-tracking branch 'origin/GamingProcessingUnit' into dynapica
This commit is contained in:
commit
02d07f29d7
106 changed files with 23630 additions and 11117 deletions
|
@ -1,10 +1,51 @@
|
|||
#include "PICA/gpu.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstdio>
|
||||
#include <cstddef>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include <cstdio>
|
||||
|
||||
using namespace Floats;
|
||||
|
||||
// A representation of the output vertex as it comes out of the vertex shader, with padding and all
|
||||
struct OutputVertex {
|
||||
using vec2f = OpenGL::Vector<f24, 2>;
|
||||
using vec3f = OpenGL::Vector<f24, 3>;
|
||||
using vec4f = OpenGL::Vector<f24, 4>;
|
||||
|
||||
union {
|
||||
struct {
|
||||
vec4f positions; // Vertex position
|
||||
vec4f quaternion; // Quaternion specifying the normal/tangent frame (for fragment lighting)
|
||||
vec4f colour; // Vertex color
|
||||
vec2f texcoord0; // Texcoords for texture unit 0 (Only U and V, W is stored separately for 3D textures!)
|
||||
vec2f texcoord1; // Texcoords for TU 1
|
||||
f24 texcoord0_w; // W component for texcoord 0 if using a 3D texture
|
||||
u32 padding; // Unused
|
||||
|
||||
vec3f view; // View vector (for fragment lighting)
|
||||
u32 padding2; // Unused
|
||||
vec2f texcoord2; // Texcoords for TU 2
|
||||
} s;
|
||||
|
||||
// The software, non-accelerated vertex loader writes here and then reads specific components from the above struct
|
||||
f24 raw[0x20];
|
||||
};
|
||||
OutputVertex() {}
|
||||
};
|
||||
#define ASSERT_POS(member, pos) static_assert(offsetof(OutputVertex, s.member) == pos * sizeof(f24), "OutputVertex struct is broken!");
|
||||
|
||||
ASSERT_POS(positions, 0)
|
||||
ASSERT_POS(quaternion, 4)
|
||||
ASSERT_POS(colour, 8)
|
||||
ASSERT_POS(texcoord0, 12)
|
||||
ASSERT_POS(texcoord1, 14)
|
||||
ASSERT_POS(texcoord0_w, 16)
|
||||
ASSERT_POS(view, 18)
|
||||
ASSERT_POS(texcoord2, 22)
|
||||
|
||||
GPU::GPU(Memory& mem) : mem(mem), renderer(*this, regs) {
|
||||
vram = new u8[vramSize];
|
||||
mem.setVRAM(vram); // Give the bus a pointer to our VRAM
|
||||
|
@ -51,36 +92,37 @@ void GPU::drawArrays(bool indexed) {
|
|||
}
|
||||
}
|
||||
|
||||
Vertex* vertices = new Vertex[Renderer::vertexBufferSize];
|
||||
static std::array<Vertex, Renderer::vertexBufferSize> vertices;
|
||||
|
||||
template <bool indexed, bool useShaderJIT>
|
||||
void GPU::drawArrays() {
|
||||
// Base address for vertex attributes
|
||||
// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
|
||||
const u32 vertexBase = ((regs[PICAInternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
||||
const u32 vertexCount = regs[PICAInternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
||||
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
||||
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
||||
|
||||
// Configures the type of primitive and the number of vertex shader outputs
|
||||
const u32 primConfig = regs[PICAInternalRegs::PrimitiveConfig];
|
||||
const u32 primType = Helpers::getBits<8, 2>(primConfig);
|
||||
if (primType != 0 && primType != 1 && primType != 3) Helpers::panic("[PICA] Tried to draw unimplemented shape %d\n", primType);
|
||||
const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig];
|
||||
const PICA::PrimType primType = static_cast<PICA::PrimType>(Helpers::getBits<8, 2>(primConfig));
|
||||
if (primType == PICA::PrimType::TriangleFan) Helpers::panic("[PICA] Tried to draw unimplemented shape %d\n", primType);
|
||||
if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize");
|
||||
|
||||
if ((primType == 0 && vertexCount % 3) || (primType == 1 && vertexCount < 3)) {
|
||||
if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) ||
|
||||
(primType == PICA::PrimType::TriangleStrip && vertexCount < 3)) {
|
||||
Helpers::panic("Invalid vertex count for primitive. Type: %d, vert count: %d\n", primType, vertexCount);
|
||||
}
|
||||
|
||||
// Get the configuration for the index buffer, used only for indexed drawing
|
||||
u32 indexBufferConfig = regs[PICAInternalRegs::IndexBufferConfig];
|
||||
u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig];
|
||||
u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff);
|
||||
bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit
|
||||
|
||||
// Stuff the global attribute config registers in one u64 to make attr parsing easier
|
||||
// TODO: Cache this when the vertex attribute format registers are written to
|
||||
u64 vertexCfg = u64(regs[PICAInternalRegs::AttribFormatLow]) | (u64(regs[PICAInternalRegs::AttribFormatHigh]) << 32);
|
||||
u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32);
|
||||
|
||||
if constexpr (!indexed) {
|
||||
u32 offset = regs[PICAInternalRegs::VertexOffsetReg];
|
||||
u32 offset = regs[PICA::InternalRegs::VertexOffsetReg];
|
||||
log("PICA::DrawArrays(vertex count = %d, vertexOffset = %d)\n", vertexCount, offset);
|
||||
} else {
|
||||
log("PICA::DrawElements(vertex count = %d, index buffer config = %08X)\n", vertexCount, indexBufferConfig);
|
||||
|
@ -91,14 +133,14 @@ void GPU::drawArrays() {
|
|||
}
|
||||
|
||||
// Total number of input attributes to shader. Differs between GS and VS. Currently stubbed to the VS one, as we don't have geometry shaders.
|
||||
const u32 inputAttrCount = (regs[PICAInternalRegs::VertexShaderInputBufferCfg] & 0xf) + 1;
|
||||
const u32 inputAttrCount = (regs[PICA::InternalRegs::VertexShaderInputBufferCfg] & 0xf) + 1;
|
||||
const u64 inputAttrCfg = getVertexShaderInputConfig();
|
||||
|
||||
for (u32 i = 0; i < vertexCount; i++) {
|
||||
u32 vertexIndex; // Index of the vertex in the VBO
|
||||
|
||||
if constexpr (!indexed) {
|
||||
vertexIndex = i + regs[PICAInternalRegs::VertexOffsetReg];
|
||||
vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
|
||||
} else {
|
||||
if (shortIndex) {
|
||||
auto ptr = getPointerPhys<u16>(indexBufferPointer);
|
||||
|
@ -204,32 +246,42 @@ void GPU::drawArrays() {
|
|||
std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f));
|
||||
}
|
||||
|
||||
if constexpr (useShaderJIT) {
|
||||
if constexpr (useShaderJIT) {
|
||||
shaderJIT.run(shaderUnit.vs);
|
||||
} else {
|
||||
shaderUnit.vs.run();
|
||||
}
|
||||
|
||||
std::memcpy(&vertices[i].position, &shaderUnit.vs.outputs[0], sizeof(vec4f));
|
||||
std::memcpy(&vertices[i].colour, &shaderUnit.vs.outputs[1], sizeof(vec4f));
|
||||
std::memcpy(&vertices[i].UVs, &shaderUnit.vs.outputs[2], 2 * sizeof(f24));
|
||||
OutputVertex out;
|
||||
// Map shader outputs to fixed function properties
|
||||
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
||||
for (int i = 0; i < totalShaderOutputs; i++) {
|
||||
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
|
||||
|
||||
for (int j = 0; j < 4; j++) { // pls unroll
|
||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||
out.raw[mapping] = shaderUnit.vs.outputs[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
std::memcpy(&vertices[i].position, &out.s.positions, sizeof(vec4f));
|
||||
std::memcpy(&vertices[i].colour, &out.s.colour, sizeof(vec4f));
|
||||
std::memcpy(&vertices[i].texcoord0, &out.s.texcoord0, 2 * sizeof(f24));
|
||||
std::memcpy(&vertices[i].texcoord1, &out.s.texcoord1, 2 * sizeof(f24));
|
||||
std::memcpy(&vertices[i].texcoord0_w, &out.s.texcoord0_w, sizeof(f24));
|
||||
std::memcpy(&vertices[i].texcoord2, &out.s.texcoord2, 2 * sizeof(f24));
|
||||
|
||||
//printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)vertices[i].position.x(), (double)vertices[i].position.y(), (double)vertices[i].position.z(), (double)vertices[i].position.w());
|
||||
//printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)vertices[i].colour.r(), (double)vertices[i].colour.g(), (double)vertices[i].colour.b(), (double)vertices[i].colour.a());
|
||||
//printf("(u, v ) = (%f, %f)\n", vertices[i].UVs.u(), vertices[i].UVs.v());
|
||||
}
|
||||
|
||||
// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
|
||||
static constexpr std::array<OpenGL::Primitives, 4> primTypes = {
|
||||
OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle
|
||||
};
|
||||
const auto shape = primTypes[primType];
|
||||
renderer.drawVertices(shape, vertices, vertexCount);
|
||||
renderer.drawVertices(primType, std::span(vertices).first(vertexCount));
|
||||
}
|
||||
|
||||
Vertex GPU::getImmediateModeVertex() {
|
||||
Vertex v;
|
||||
const int totalAttrCount = (regs[PICAInternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||
const int totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||
|
||||
// Copy immediate mode attributes to vertex shader unit
|
||||
for (int i = 0; i < totalAttrCount; i++) {
|
||||
|
@ -240,11 +292,11 @@ Vertex GPU::getImmediateModeVertex() {
|
|||
shaderUnit.vs.run();
|
||||
std::memcpy(&v.position, &shaderUnit.vs.outputs[0], sizeof(vec4f));
|
||||
std::memcpy(&v.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f));
|
||||
std::memcpy(&v.UVs, &shaderUnit.vs.outputs[2], 2 * sizeof(f24));
|
||||
std::memcpy(&v.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24));
|
||||
|
||||
printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.position.x(), (double)v.position.y(), (double)v.position.z(), (double)v.position.w());
|
||||
printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.colour.r(), (double)v.colour.g(), (double)v.colour.b(), (double)v.colour.a());
|
||||
printf("(u, v ) = (%f, %f)\n", v.UVs.u(), v.UVs.v());
|
||||
printf("(u, v ) = (%f, %f)\n", v.texcoord0.u(), v.texcoord0.v());
|
||||
|
||||
return v;
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ u32 GPU::readInternalReg(u32 index) {
|
|||
}
|
||||
|
||||
void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||
using namespace PICAInternalRegs;
|
||||
using namespace PICA::InternalRegs;
|
||||
|
||||
if (index > regNum) {
|
||||
Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value);
|
||||
|
@ -68,7 +68,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
|
||||
case ColourBufferFormat: {
|
||||
u32 format = getBits<16, 3>(value);
|
||||
renderer.setColourFormat(format);
|
||||
renderer.setColourFormat(static_cast<PICA::ColorFmt>(format));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -79,8 +79,8 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
|
||||
case DepthBufferFormat: {
|
||||
u32 fmt = value & 0x3;
|
||||
renderer.setDepthFormat(fmt);
|
||||
u32 format = value & 0x3;
|
||||
renderer.setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -137,7 +137,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
if (fixedAttribIndex < 12) [[likely]] {
|
||||
shaderUnit.vs.fixedAttributes[fixedAttribIndex++] = attr;
|
||||
} else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex
|
||||
const uint totalAttrCount = (regs[PICAInternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||
const uint totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||
if (totalAttrCount <= immediateModeAttrIndex) {
|
||||
printf("Broken state in the immediate mode vertex submission pipeline. Failing silently\n");
|
||||
immediateModeAttrIndex = 0;
|
||||
|
@ -151,13 +151,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
immediateModeVertices[immediateModeVertIndex++] = v;
|
||||
|
||||
// Get primitive type
|
||||
const u32 primConfig = regs[PICAInternalRegs::PrimitiveConfig];
|
||||
const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig];
|
||||
const u32 primType = getBits<8, 2>(primConfig);
|
||||
|
||||
// If we've reached 3 verts, issue a draw call
|
||||
// Handle rendering depending on the primitive type
|
||||
if (immediateModeVertIndex == 3) {
|
||||
renderer.drawVertices(OpenGL::Triangle, &immediateModeVertices[0], 3);
|
||||
renderer.drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
||||
|
||||
switch (primType) {
|
||||
// Triangle or geometry primitive. Draw a triangle and discard all vertices
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue