code: Better screen support

This commit is contained in:
GPUCode 2023-07-10 23:59:44 +03:00
parent d28796fd3f
commit f75a23b5a9
15 changed files with 305 additions and 66 deletions

View file

@ -16,6 +16,12 @@
#include "renderer_vk/renderer_vk.hpp"
#endif
constexpr u32 top_screen_width = 240;
constexpr u32 top_screen_height = 400;
constexpr u32 bottom_screen_width = 240;
constexpr u32 bottom_screen_height = 300;
using namespace Floats;
// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it
@ -78,6 +84,27 @@ void GPU::reset() {
e.config2 = 0;
}
// Initialize the framebuffer registers. Values taken from Citra.
using namespace PICA::ExternalRegs;
// Top screen addresses and dimentions.
external_regs[Framebuffer0AFirstAddr] = 0x181E6000;
external_regs[Framebuffer0ASecondAddr] = 0x1822C800;
external_regs[Framebuffer0BFirstAddr] = 0x18273000;
external_regs[Framebuffer0BSecondAddr] = 0x182B9800;
external_regs[Framebuffer0Size] = (top_screen_height << 16) | top_screen_width;
external_regs[Framebuffer0Stride] = 720;
external_regs[Framebuffer0Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
external_regs[Framebuffer0Select] = 0;
// Bottom screen addresses and dimentions.
external_regs[Framebuffer1AFirstAddr] = 0x1848F000;
external_regs[Framebuffer1ASecondAddr] = 0x184C7800;
external_regs[Framebuffer1Size] = (bottom_screen_height << 16) | bottom_screen_width;
external_regs[Framebuffer1Stride] = 720;
external_regs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
external_regs[Framebuffer1Select] = 0;
renderer->reset();
}

View file

@ -19,11 +19,36 @@ void GPU::writeReg(u32 address, u32 value) {
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
const u32 index = (address - 0x1EF01000) / sizeof(u32);
writeInternalReg(index, value, 0xffffffff);
} else if (address >= 0x1EF00004 && address < 0x1EF01000) {
const u32 index = (address - 0x1EF00004) / sizeof(u32);
writeExternalReg(index, value);
} else {
log("Ignoring write to external GPU register %08X. Value: %08X\n", address, value);
log("Ignoring write to unknown GPU register %08X. Value: %08X\n", address, value);
}
}
u32 GPU::readExternalReg(u32 index) {
using namespace PICA::ExternalRegs;
if (index > 0x1000) [[unlikely]] {
Helpers::panic("Tried to read invalid external GPU register. Index: %X\n", index);
return -1;
}
return external_regs[index];
}
void GPU::writeExternalReg(u32 index, u32 value) {
using namespace PICA::ExternalRegs;
if (index > 0x1000) [[unlikely]] {
Helpers::panic("Tried to write to invalid external GPU register. Index: %X, value: %08X\n", index, value);
return;
}
external_regs[index] = value;
}
u32 GPU::readInternalReg(u32 index) {
using namespace PICA::InternalRegs;
@ -162,7 +187,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
}
break;
// Restart immediate mode primitive drawing
// Restart immediate mode primitive drawing
case PrimitiveRestart:
if (value & 1) {
immediateModeAttrIndex = 0;
@ -384,4 +409,4 @@ void GPU::startCommandList(u32 addr, u32 size) {
writeInternalReg(id, param, mask);
}
}
}
}

View file

@ -450,6 +450,37 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
void RendererGL::display() {
gl.disableScissor();
gl.disableBlend();
gl.disableDepth();
gl.disableScissor();
gl.setColourMask(true, true, true, true);
gl.useProgram(displayProgram);
gl.bindVAO(dummyVAO);
OpenGL::disableClipPlane(0);
OpenGL::disableClipPlane(1);
using namespace PICA::ExternalRegs;
const u32 topScreenAddr = gpu.readExternalReg(Framebuffer0AFirstAddr);
const u32 bottomScreenAddr = gpu.readExternalReg(Framebuffer1AFirstAddr);
auto topScreen = colourBufferCache.findFromAddress(topScreenAddr);
auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr);
Helpers::warn("Top screen addr %08X\n", topScreenAddr);
screenFramebuffer.bind(OpenGL::DrawFramebuffer);
if (topScreen) {
topScreen->get().texture.bind();
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
}
if (bottomScreen) {
bottomScreen->get().texture.bind();
OpenGL::setViewport(40, 0, 320, 240);
OpenGL::draw(OpenGL::TriangleStrip, 4);
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
screenFramebuffer.bind(OpenGL::ReadFramebuffer);
@ -550,42 +581,56 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
}
}
// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format
PICA::ColorFmt ToColorFmt(u32 format) {
switch (format) {
case 2: return PICA::ColorFmt::RGB565;
case 3: return PICA::ColorFmt::RGBA5551;
default: return static_cast<PICA::ColorFmt>(format);
}
}
void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
const u32 inputWidth = inputSize & 0xffff;
const u32 inputGap = inputSize >> 16;
const u32 inputHeight = inputSize >> 16;
const auto inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags));
const auto outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags));
const PICA::Scaling scaling = static_cast<PICA::Scaling>(Helpers::getBits<24, 2>(flags));
const u32 outputWidth = outputSize & 0xffff;
const u32 outputGap = outputSize >> 16;
auto framebuffer = colourBufferCache.findFromAddress(inputAddr);
// If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0
// Displays are hard I really don't want to try implementing them because getting a fast solution is terrible
OpenGL::Texture& tex = framebuffer.has_value() ? framebuffer.value().get().texture : colourBufferCache[0].texture;
tex.bind();
screenFramebuffer.bind(OpenGL::DrawFramebuffer);
gl.disableBlend();
gl.disableLogicOp();
gl.disableDepth();
gl.disableScissor();
gl.disableStencil();
gl.setColourMask(true, true, true, true);
gl.useProgram(displayProgram);
gl.bindVAO(dummyVAO);
gl.disableClipPlane(0);
gl.disableClipPlane(1);
// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
// We consider output gap == 320 to mean bottom, and anything else to mean top
if (outputGap == 320) {
OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport
} else {
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
u32 outputWidth = outputSize & 0xffff;
if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) {
outputWidth >>= 1;
}
u32 outputHeight = outputSize >> 16;
if (scaling == PICA::Scaling::XY) {
outputHeight >>= 1;
}
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
// If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0
// Displays are hard I really don't want to try implementing them because getting a fast solution is terrible
auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight);
auto dstFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight);
Helpers::warn("Display transfer with outputAddr %08X\n", outputAddr);
// Blit the framebuffers
srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer);
dstFramebuffer.fbo.bind(OpenGL::DrawFramebuffer);
glBlitFramebuffer(0, 0, inputWidth, inputHeight, 0, 0, outputWidth, outputHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR);
}
ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) {
// Try to find an already existing buffer that contains the provided address
// This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to
// subrect of a surface and in case of texcopy we don't know the format of the surface.
auto buffer = colourBufferCache.findFromAddress(addr);
if (buffer.has_value()) {
return buffer.value().get();
}
// Otherwise create and cache a new buffer.
ColourBuffer sampleBuffer(addr, format, width, height);
return colourBufferCache.add(sampleBuffer);
}
void RendererGL::screenshot(const std::string& name) {

View file

@ -9,4 +9,4 @@ void RendererNull::initGraphicsContext(SDL_Window* window) {}
void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {}
void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {}
void RendererNull::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {}
void RendererNull::screenshot(const std::string& name) {}
void RendererNull::screenshot(const std::string& name) {}

View file

@ -1,4 +1,5 @@
#include "services/gsp_gpu.hpp"
#include "PICA/regs.hpp"
#include "ipc.hpp"
#include "kernel.hpp"
@ -10,6 +11,7 @@ namespace ServiceCommands {
RegisterInterruptRelayQueue = 0x00130042,
WriteHwRegs = 0x00010082,
WriteHwRegsWithMask = 0x00020084,
SetBufferSwap = 0x00050200,
FlushDataCache = 0x00080082,
SetLCDForceBlack = 0x000B0040,
TriggerCmdReqQueue = 0x000C0000,
@ -19,16 +21,14 @@ namespace ServiceCommands {
}
// Commands written to shared memory and processed by TriggerCmdReqQueue
namespace GXCommands {
enum : u32 {
TriggerDMARequest = 0,
ProcessCommandList = 1,
MemoryFill = 2,
TriggerDisplayTransfer = 3,
TriggerTextureCopy = 4,
FlushCacheRegions = 5
};
}
enum class GXCommands : u32 {
TriggerDMARequest = 0,
ProcessCommandList = 1,
MemoryFill = 2,
TriggerDisplayTransfer = 3,
TriggerTextureCopy = 4,
FlushCacheRegions = 5
};
void GPUService::reset() {
privilegedProcess = 0xFFFFFFFF; // Set the privileged process to an invalid handle
@ -44,13 +44,14 @@ void GPUService::handleSyncRequest(u32 messagePointer) {
case ServiceCommands::FlushDataCache: flushDataCache(messagePointer); break;
case ServiceCommands::RegisterInterruptRelayQueue: registerInterruptRelayQueue(messagePointer); break;
case ServiceCommands::SetAxiConfigQoSMode: setAxiConfigQoSMode(messagePointer); break;
case ServiceCommands::SetBufferSwap: setBufferSwap(messagePointer); break;
case ServiceCommands::SetInternalPriorities: setInternalPriorities(messagePointer); break;
case ServiceCommands::SetLCDForceBlack: setLCDForceBlack(messagePointer); break;
case ServiceCommands::StoreDataCache: storeDataCache(messagePointer); break;
case ServiceCommands::TriggerCmdReqQueue: [[likely]] triggerCmdReqQueue(messagePointer); break;
case ServiceCommands::WriteHwRegs: writeHwRegs(messagePointer); break;
case ServiceCommands::WriteHwRegsWithMask: writeHwRegsWithMask(messagePointer); break;
; default: Helpers::panic("GPU service requested. Command: %08X\n", command);
default: Helpers::panic("GPU service requested. Command: %08X\n", command);
}
}
@ -124,15 +125,12 @@ void GPUService::requestInterrupt(GPUInterrupt type) {
// Not emulating this causes Yoshi's Wooly World, Captain Toad, Metroid 2 et al to hang
if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) {
int screen = static_cast<u32>(type) - static_cast<u32>(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom
constexpr u32 FBInfoSize = 0x40;
// TODO: Offset depends on GSP thread being triggered
u8* info = &sharedMem[0x200 + screen * FBInfoSize];
u8& dirtyFlag = info[1];
FrameBufferUpdate* update = reinterpret_cast<FrameBufferUpdate*>(&sharedMem[0x200 + screen * sizeof(FrameBufferUpdate)]);
if (dirtyFlag & 1) {
// TODO: Submit buffer info here
dirtyFlag &= ~1;
if (update->dirtyFlag & 1) {
setBufferSwapImpl(screen, update->framebufferInfo[update->index]);
update->dirtyFlag &= ~1;
}
}
@ -261,6 +259,18 @@ void GPUService::setAxiConfigQoSMode(u32 messagePointer) {
mem.write32(messagePointer + 4, Result::Success);
}
void GPUService::setBufferSwap(u32 messagePointer) {
FramebufferInfo info{};
const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1
info.activeFb = mem.read32(messagePointer + 8);
info.leftFramebufferVaddr = mem.read32(messagePointer + 12);
info.rightFramebufferVaddr = mem.read32(messagePointer + 16);
info.stride = mem.read32(messagePointer + 20);
info.format = mem.read32(messagePointer + 24);
info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B
setBufferSwapImpl(screenId, info);
}
// Seems to also be completely undocumented
void GPUService::setInternalPriorities(u32 messagePointer) {
log("GSP::GPU::SetInternalPriorities\n");
@ -283,7 +293,7 @@ void GPUService::processCommandBuffer() {
log("Processing %d GPU commands\n", commandsLeft);
while (commandsLeft != 0) {
u32 cmdID = cmd[0] & 0xff;
const GXCommands cmdID = static_cast<GXCommands>(cmd[0] & 0xff);
switch (cmdID) {
case GXCommands::ProcessCommandList: processCommandList(cmd); break;
case GXCommands::MemoryFill: memoryFill(cmd); break;
@ -375,12 +385,45 @@ void GPUService::flushCacheRegions(u32* cmd) {
log("GSP::GPU::FlushCacheRegions (Stubbed)\n");
}
void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) {
using namespace PICA::ExternalRegs;
constexpr static std::array<u32, 8> fb_addresses = {
Framebuffer0AFirstAddr,
Framebuffer0ASecondAddr,
Framebuffer0BFirstAddr,
Framebuffer0BSecondAddr,
Framebuffer1AFirstAddr,
Framebuffer1ASecondAddr,
Framebuffer1BFirstAddr,
Framebuffer1BSecondAddr,
};
const u32 fb_index = screenId * 4 + info.activeFb * 2;
gpu.writeExternalReg(fb_addresses[fb_index], VaddrToPaddr(info.leftFramebufferVaddr));
gpu.writeExternalReg(fb_addresses[fb_index + 1], VaddrToPaddr(info.rightFramebufferVaddr));
constexpr static std::array<u32, 6> config_addresses = {
Framebuffer0Config,
Framebuffer0Select,
Framebuffer0Stride,
Framebuffer1Config,
Framebuffer1Select,
Framebuffer1Stride,
};
const u32 config_index = screenId * 3;
gpu.writeExternalReg(config_addresses[config_index], info.format);
gpu.writeExternalReg(config_addresses[config_index + 1], info.displayFb);
gpu.writeExternalReg(config_addresses[config_index + 2], info.stride);
}
// Actually send command list (aka display list) to GPU
void GPUService::processCommandList(u32* cmd) {
const u32 address = cmd[1] & ~7; // Buffer address
const u32 size = cmd[2] & ~3; // Buffer size in bytes
const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
[[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
[[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size);
gpu.startCommandList(address, size);