mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 06:05:40 +12:00
170 lines
6.6 KiB
C++
170 lines
6.6 KiB
C++
#pragma once
|
|
#include <array>
|
|
|
|
#include "PICA/dynapica/shader_rec.hpp"
|
|
#include "PICA/float_types.hpp"
|
|
#include "PICA/pica_vertex.hpp"
|
|
#include "PICA/regs.hpp"
|
|
#include "PICA/shader_unit.hpp"
|
|
#include "config.hpp"
|
|
#include "helpers.hpp"
|
|
#include "logger.hpp"
|
|
#include "memory.hpp"
|
|
#include "renderer.hpp"
|
|
|
|
class GPU {
|
|
static constexpr u32 regNum = 0x300;
|
|
static constexpr u32 extRegNum = 0x1000;
|
|
|
|
using vec4f = std::array<Floats::f24, 4>;
|
|
using Registers = std::array<u32, regNum>; // Internal registers (named registers in short since they're the main ones)
|
|
using ExternalRegisters = std::array<u32, extRegNum>;
|
|
|
|
Memory& mem;
|
|
EmulatorConfig& config;
|
|
ShaderUnit shaderUnit;
|
|
ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported
|
|
|
|
u8* vram = nullptr;
|
|
MAKE_LOG_FUNCTION(log, gpuLogger)
|
|
|
|
static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes
|
|
static constexpr u32 vramSize = u32(6_MB);
|
|
Registers regs; // GPU internal registers
|
|
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
|
|
|
|
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
|
std::array<PICA::Vertex, 3> immediateModeVertices;
|
|
uint immediateModeVertIndex;
|
|
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
|
|
|
template <bool indexed, bool useShaderJIT>
|
|
void drawArrays();
|
|
|
|
// Silly method of avoiding linking problems. TODO: Change to something less silly
|
|
void drawArrays(bool indexed);
|
|
|
|
struct AttribInfo {
|
|
u32 offset = 0; // Offset from base vertex array
|
|
int size = 0; // Bytes per vertex
|
|
u32 config1 = 0;
|
|
u32 config2 = 0;
|
|
u32 componentCount = 0; // Number of components for the attribute
|
|
|
|
u64 getConfigFull() { return u64(config1) | (u64(config2) << 32); }
|
|
};
|
|
|
|
u64 getVertexShaderInputConfig() {
|
|
return u64(regs[PICA::InternalRegs::VertexShaderInputCfgLow]) | (u64(regs[PICA::InternalRegs::VertexShaderInputCfgHigh]) << 32);
|
|
}
|
|
|
|
std::array<AttribInfo, maxAttribCount> attributeInfo; // Info for each of the 12 attributes
|
|
u32 totalAttribCount = 0; // Number of vertex attributes to send to VS
|
|
u32 fixedAttribMask = 0; // Which attributes are fixed?
|
|
|
|
u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range)
|
|
u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted
|
|
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
|
|
|
|
// Command processor pointers for GPU command lists
|
|
u32* cmdBuffStart = nullptr;
|
|
u32* cmdBuffEnd = nullptr;
|
|
u32* cmdBuffCurr = nullptr;
|
|
|
|
std::unique_ptr<Renderer> renderer;
|
|
PICA::Vertex getImmediateModeVertex();
|
|
|
|
public:
|
|
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
|
|
// Encoded in PICA native format
|
|
static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256;
|
|
std::array<uint32_t, LightingLutSize> lightingLUT;
|
|
|
|
// Used to prevent uploading the lighting_lut on every draw call
|
|
// Set to true when the CPU writes to the lighting_lut
|
|
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
|
|
bool lightingLUTDirty = false;
|
|
|
|
GPU(Memory& mem, EmulatorConfig& config);
|
|
void display() { renderer->display(); }
|
|
void screenshot(const std::string& name) { renderer->screenshot(name); }
|
|
void deinitGraphicsContext() { renderer->deinitGraphicsContext(); }
|
|
|
|
#if defined(PANDA3DS_FRONTEND_SDL)
|
|
void initGraphicsContext(SDL_Window* window) { renderer->initGraphicsContext(window); }
|
|
#elif defined(PANDA3DS_FRONTEND_QT)
|
|
void initGraphicsContext(GL::Context* context) { renderer->initGraphicsContext(context); }
|
|
#endif
|
|
|
|
void fireDMA(u32 dest, u32 source, u32 size);
|
|
void reset();
|
|
|
|
Registers& getRegisters() { return regs; }
|
|
ExternalRegisters& getExtRegisters() { return externalRegs; }
|
|
void startCommandList(u32 addr, u32 size);
|
|
|
|
// Used by the GSP GPU service for readHwRegs/writeHwRegs/writeHwRegsMasked
|
|
u32 readReg(u32 address);
|
|
void writeReg(u32 address, u32 value);
|
|
|
|
u32 readExternalReg(u32 index);
|
|
void writeExternalReg(u32 index, u32 value);
|
|
|
|
// Used when processing GPU command lists
|
|
u32 readInternalReg(u32 index);
|
|
void writeInternalReg(u32 index, u32 value, u32 mask);
|
|
|
|
// Used for setting the size of the window we'll be outputting graphics to
|
|
void setOutputSize(u32 width, u32 height) { renderer->setOutputSize(width, height); }
|
|
|
|
// TODO: Emulate the transfer engine & its registers
|
|
// Then this can be emulated by just writing the appropriate values there
|
|
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) { renderer->clearBuffer(startAddress, endAddress, value, control); }
|
|
|
|
// TODO: Emulate the transfer engine & its registers
|
|
// Then this can be emulated by just writing the appropriate values there
|
|
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
|
|
renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags);
|
|
}
|
|
|
|
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
|
renderer->textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags);
|
|
}
|
|
|
|
// Read a value of type T from physical address paddr
|
|
// This is necessary because vertex attribute fetching uses physical addresses
|
|
template <typename T>
|
|
T readPhysical(u32 paddr) {
|
|
if (paddr >= PhysicalAddrs::FCRAM && paddr <= PhysicalAddrs::FCRAMEnd) {
|
|
u8* fcram = mem.getFCRAM();
|
|
u32 index = paddr - PhysicalAddrs::FCRAM;
|
|
|
|
return *(T*)&fcram[index];
|
|
} else {
|
|
Helpers::panic("[PICA] Read unimplemented paddr %08X", paddr);
|
|
}
|
|
}
|
|
|
|
// Get a pointer of type T* to the data starting from physical address paddr
|
|
template <typename T>
|
|
T* getPointerPhys(u32 paddr, u32 size = 0) {
|
|
if (paddr >= PhysicalAddrs::FCRAM && paddr + size <= PhysicalAddrs::FCRAMEnd) {
|
|
u8* fcram = mem.getFCRAM();
|
|
u32 index = paddr - PhysicalAddrs::FCRAM;
|
|
|
|
return (T*)&fcram[index];
|
|
} else if (paddr >= PhysicalAddrs::VRAM && paddr + size <= PhysicalAddrs::VRAMEnd) {
|
|
u32 index = paddr - PhysicalAddrs::VRAM;
|
|
return (T*)&vram[index];
|
|
} else [[unlikely]] {
|
|
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
|
|
}
|
|
}
|
|
|
|
Renderer* getRenderer() { return renderer.get(); }
|
|
private:
|
|
// GPU external registers
|
|
// We have them in the end of the struct for cache locality reasons. Tl;dr we want the more commonly used things to be packed in the start
|
|
// Of the struct, instead of externalRegs being in the middle
|
|
ExternalRegisters externalRegs;
|
|
};
|