diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 9b2129f1..56898455 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -6,14 +6,41 @@ class GPU { Memory& mem; - static constexpr u32 regNum = 0x300; ShaderUnit shaderUnit; - std::array regs; // GPU internal registers + u8* vram = nullptr; + static constexpr u32 totalAttribCount = 12; // Up to 12 vertex attributes + static constexpr u32 regNum = 0x300; + static constexpr u32 vramSize = 6_MB; + std::array regs; // GPU internal registers + + template + T readPhysical(u32 paddr) { + if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) { + u8* fcram = mem.getFCRAM(); + u32 index = paddr - PhysicalAddrs::FCRAM; + + return *(T*)&fcram[index]; + } else { + Helpers::panic("[PICA] Read unimplemented paddr %08X", paddr); + } + } + + template void drawArrays(); + // Silly method of avoiding linking problems. TODO: Change to something less silly + void drawArrays(bool indexed); + + struct AttribInfo { + u32 offset = 0; // Offset from base vertex array + int size = 0; // Bytes per vertex + }; + + std::array attributeInfo; + public: - GPU(Memory& mem) : mem(mem) {} + GPU(Memory& mem); void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control); void reset(); diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index afcaa5e5..4d4e5447 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -2,10 +2,42 @@ namespace PICAInternalRegs { enum : u32 { - // Draw command regs + // Geometry pipelin regs + VertexAttribLoc = 0x200, + IndexBufferConfig = 0x227, VertexCountReg = 0x228, VertexOffsetReg = 0x22A, SignalDrawArrays = 0x22E, + SignalDrawElements = 0x22F, + + Attrib0Offset = 0x203, + Attrib1Offset = 0x206, + Attrib2Offset = 0x209, + Attrib3Offset = 0x20C, + Attrib4Offset = 0x20F, + Attrib5Offset = 0x212, + Attrib6Offset = 0x215, + Attrib7Offset = 0x218, + Attrib8Offset = 0x21B, + Attrib9Offset = 0x21E, + Attrib10Offset = 0x221, + Attrib11Offset = 0x224, + + Attrib0Config2 = 0x205, + Attrib1Config2 = 0x208, + Attrib2Config2 = 0x20B, + Attrib3Config2 = 0x20E, + Attrib4Config2 = 0x211, + Attrib5Config2 = 0x214, + Attrib6Config2 = 0x217, + Attrib7Config2 = 0x21A, + Attrib8Config2 = 0x21D, + Attrib9Config2 = 0x220, + Attrib10Config2 = 0x223, + Attrib11Config2 = 0x226, + + AttribInfoStart = Attrib0Offset, + AttribInfoEnd = Attrib11Config2, // Vertex shader registers VertexShaderTransferEnd = 0x2BF, diff --git a/include/memory.hpp b/include/memory.hpp index 1f66b8ff..5cfe1c63 100644 --- a/include/memory.hpp +++ b/include/memory.hpp @@ -7,7 +7,8 @@ namespace PhysicalAddrs { enum : u32 { - FCRAM = 0x20000000 + FCRAM = 0x20000000, + FCRAMEnd = FCRAM + 0x07FFFFFF }; } @@ -24,7 +25,7 @@ namespace VirtualAddrs { StackSize = 0x4000, NormalHeapStart = 0x08000000, - LinearHeapStartOld = 0x14000000, // If kernel version < + LinearHeapStartOld = 0x14000000, // If kernel version < 0x22C LinearHeapStartNew = 0x30000000, // Start of TLS for first thread. Next thread's storage will be at TLSBase + 0x1000, and so on @@ -116,13 +117,14 @@ public: void write64(u32 vaddr, u64 value); u32 getLinearHeapVaddr(); + u8* getFCRAM() { return fcram; } // Returns whether "addr" is aligned to a page (4096 byte) boundary static constexpr bool isAligned(u32 addr) { return (addr & pageMask) == 0; } - // Allocate "size" bytes of RAM starting from physical FCRAM address "paddr" (We pick it ourself if paddr == 0) + // Allocate "size" bytes of RAM starting from FCRAM index "paddr" (We pick it ourself if paddr == 0) // And map them to virtual address "vaddr" (We also pick it ourself if vaddr == 0). // If the "linear" flag is on, the paddr pages must be adjacent in FCRAM // r, w, x: Permissions for the allocated memory diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index dd45a282..762e6a4c 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -5,9 +5,14 @@ using namespace Floats; +GPU::GPU(Memory& mem) : mem(mem) { + vram = new u8[vramSize]; +} + void GPU::reset() { regs.fill(0); shaderUnit.reset(); + std::memset(vram, 0, vramSize); // TODO: Reset blending, texturing, etc here } @@ -15,10 +20,23 @@ void GPU::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) printf("GPU: Clear buffer\nStart: %08X End: %08X\nValue: %08X Control: %08X\n", startAddress, endAddress, value, control); } -void GPU::drawArrays() { - const u32 vertexCount = regs[PICAInternalRegs::VertexCountReg]; - const u32 vertexOffset = regs[PICAInternalRegs::VertexOffsetReg]; +void GPU::drawArrays(bool indexed) { + if (indexed) + drawArrays(); + else + drawArrays(); +} +template +void GPU::drawArrays() { + // Base address for vertex attributes + // The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible + const u32 vertexBase = ((regs[PICAInternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16; + const u32 vertexCount = regs[PICAInternalRegs::VertexCountReg]; // Total # of vertices to transfer + printf("Vertex location: %08X\n", vertexBase); + + //u32* vertexBuffer = static_cast(mem.getReadPointer(vertexBase)); + //if (!vertexBuffer) Helpers::panic("PICA::DrawArrays: Failed to get attribute buffer"); u32* attrBuffer = ®s[0x233]; auto a = f24::fromRaw(attrBuffer[0] >> 8); @@ -26,6 +44,29 @@ void GPU::drawArrays() { auto g = f24::fromRaw(((attrBuffer[1] & 0xFFFF) << 8) | ((attrBuffer[2] >> 24) & 0xFF)); auto r = f24::fromRaw(attrBuffer[2] & 0xFFFFFF); - printf("PICA::DrawArrays(vertex count = %d, vertexOffset = %d)\n", vertexCount, vertexOffset); + if constexpr (!indexed) { + u32 offset = regs[PICAInternalRegs::VertexOffsetReg]; + printf("PICA::DrawArrays(vertex count = %d, vertexOffset = %d)\n", vertexCount, offset); + } else { + Helpers::panic("[PICA] Indexed drawing"); + } + printf("(r: %f, g: %f, b: %f, a: %f)\n", r.toFloat32(), g.toFloat32(), b.toFloat32(), a.toFloat32()); + + for (u32 i = 0; i < vertexCount; i++) { + u32 vertexIndex; // Index of the vertex in the VBO + if constexpr (!indexed) { + vertexIndex = i + regs[PICAInternalRegs::VertexOffsetReg]; + } else { + Helpers::panic("[PICA]: Unimplemented indexed rendering"); + } + + // Get address of attribute 0 + auto& attr = attributeInfo[0]; + u32 attr0Addr = vertexBase + attr.offset + (vertexIndex * attr.size); + + u32 attr0 = readPhysical(attr0Addr); + u32 attr0Float = *(float*)&attr0; + printf("Attr0: %f\n", (double)attr0Float); + } } \ No newline at end of file diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 649e2584..3b3c312c 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -40,7 +40,11 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { // We currently use the unmasked value like Citra does switch (index) { case SignalDrawArrays: - if (value != 0) drawArrays(); + if (value != 0) drawArrays(false); + break; + + case SignalDrawElements: + if (value != 0) drawArrays(true); break; case VertexShaderTransferEnd: @@ -57,7 +61,22 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { break; default: - printf("GPU: Wrote to unimplemented internal reg: %X, value: %08X\n", index, newValue); + // Vertex attribute registers + if (index >= AttribInfoStart && index <= AttribInfoEnd) { + uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to + uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to? + auto& attr = attributeInfo[attributeIndex]; + + switch (reg) { + case 0: attr.offset = value & 0xfffffff; break; // Attribute offset + case 1: break; // We don't handle this yet + case 2: // We don't handle most of this yet + attr.size = (value >> 16) & 0xff; + break; + } + } else { + printf("GPU: Wrote to unimplemented internal reg: %X, value: %08X\n", index, newValue); + } break; } } \ No newline at end of file diff --git a/src/core/kernel/kernel.cpp b/src/core/kernel/kernel.cpp index a3cf16f9..5aed4436 100644 --- a/src/core/kernel/kernel.cpp +++ b/src/core/kernel/kernel.cpp @@ -155,7 +155,7 @@ void Kernel::getProcessInfo() { } switch (type) { - case 20: + case 20: // Returns 0x20000000 - regs[1] = PhysicalAddrs::FCRAM - mem.getLinearHeapVaddr(); regs[2] = 0; break;