mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-07 06:35:40 +12:00
Merge pull request #155 from GPUCode/moar-gpu
gpu: Better screen support (and other things to pander to the panders)
This commit is contained in:
commit
fc7bdc9158
27 changed files with 719 additions and 194 deletions
|
@ -49,6 +49,7 @@ include_directories(third_party/result/include)
|
|||
include_directories(third_party/xxhash/include)
|
||||
include_directories(third_party/httplib)
|
||||
include_directories(third_party/stb)
|
||||
include_directories(third_party/opengl)
|
||||
|
||||
add_compile_definitions(NOMINMAX) # Make windows.h not define min/max macros because third-party deps don't like it
|
||||
add_compile_definitions(WIN32_LEAN_AND_MEAN) # Make windows.h not include literally everything
|
||||
|
@ -176,7 +177,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp
|
|||
include/config.hpp include/services/ir_user.hpp include/http_server.hpp include/cheats.hpp
|
||||
include/action_replay.hpp include/renderer_sw/renderer_sw.hpp include/compiler_builtins.hpp
|
||||
include/fs/romfs.hpp include/fs/ivfc.hpp include/discord_rpc.hpp include/services/http.hpp include/result/result_cfg.hpp
|
||||
include/services/soc.hpp include/services/ssl.hpp
|
||||
include/math_util.hpp include/services/soc.hpp include/services/ssl.hpp
|
||||
)
|
||||
|
||||
set(THIRD_PARTY_SOURCE_FILES third_party/imgui/imgui.cpp
|
||||
|
@ -202,7 +203,8 @@ set(RENDERER_GL_SOURCE_FILES "") # Empty by default unless we are compiling with
|
|||
set(RENDERER_VK_SOURCE_FILES "") # Empty by default unless we are compiling with the VK renderer
|
||||
|
||||
if(ENABLE_OPENGL)
|
||||
set(RENDERER_GL_INCLUDE_FILES include/renderer_gl/opengl.hpp
|
||||
# This may look weird but opengl.hpp is our header even if it's in the third_party folder
|
||||
set(RENDERER_GL_INCLUDE_FILES third_party/opengl/opengl.hpp
|
||||
include/renderer_gl/renderer_gl.hpp include/renderer_gl/textures.hpp
|
||||
include/renderer_gl/surfaces.hpp include/renderer_gl/surface_cache.hpp
|
||||
include/renderer_gl/gl_state.hpp
|
||||
|
|
|
@ -14,8 +14,11 @@
|
|||
|
||||
class GPU {
|
||||
static constexpr u32 regNum = 0x300;
|
||||
static constexpr u32 extRegNum = 0x1000;
|
||||
|
||||
using vec4f = std::array<Floats::f24, 4>;
|
||||
using Registers = std::array<u32, regNum>;
|
||||
using Registers = std::array<u32, regNum>; // Internal registers (named registers in short since they're the main ones)
|
||||
using ExternalRegisters = std::array<u32, extRegNum>;
|
||||
|
||||
Memory& mem;
|
||||
EmulatorConfig& config;
|
||||
|
@ -91,12 +94,16 @@ class GPU {
|
|||
void reset();
|
||||
|
||||
Registers& getRegisters() { return regs; }
|
||||
ExternalRegisters& getExtRegisters() { return externalRegs; }
|
||||
void startCommandList(u32 addr, u32 size);
|
||||
|
||||
// Used by the GSP GPU service for readHwRegs/writeHwRegs/writeHwRegsMasked
|
||||
u32 readReg(u32 address);
|
||||
void writeReg(u32 address, u32 value);
|
||||
|
||||
u32 readExternalReg(u32 index);
|
||||
void writeExternalReg(u32 index, u32 value);
|
||||
|
||||
// Used when processing GPU command lists
|
||||
u32 readInternalReg(u32 index);
|
||||
void writeInternalReg(u32 index, u32 value, u32 mask);
|
||||
|
@ -111,6 +118,10 @@ class GPU {
|
|||
renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags);
|
||||
}
|
||||
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
renderer->textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags);
|
||||
}
|
||||
|
||||
// Read a value of type T from physical address paddr
|
||||
// This is necessary because vertex attribute fetching uses physical addresses
|
||||
template <typename T>
|
||||
|
@ -140,4 +151,10 @@ class GPU {
|
|||
Helpers::panic("[GPU] Tried to access unknown physical address: %08X", paddr);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// GPU external registers
|
||||
// We have them in the end of the struct for cache locality reasons. Tl;dr we want the more commonly used things to be packed in the start
|
||||
// Of the struct, instead of externalRegs being in the middle
|
||||
ExternalRegisters externalRegs;
|
||||
};
|
||||
|
|
|
@ -22,6 +22,7 @@ namespace PICA {
|
|||
ShaderOutputCount = 0x4F,
|
||||
ShaderOutmap0 = 0x50,
|
||||
|
||||
ViewportXY = 0x68,
|
||||
DepthmapEnable = 0x6D,
|
||||
|
||||
// Texture registers
|
||||
|
@ -178,6 +179,53 @@ namespace PICA {
|
|||
};
|
||||
}
|
||||
|
||||
namespace ExternalRegs {
|
||||
enum : u32 {
|
||||
MemFill1BufferStartPaddr = 0x3,
|
||||
MemFill1BufferEndPAddr = 0x4,
|
||||
MemFill1Value = 0x5,
|
||||
MemFill1Control = 0x6,
|
||||
MemFill2BufferStartPaddr = 0x7,
|
||||
MemFill2BufferEndPAddr = 0x8,
|
||||
MemFill2Value = 0x9,
|
||||
MemFill2Control = 0xA,
|
||||
VramBankControl = 0xB,
|
||||
GPUBusy = 0xC,
|
||||
BacklightControl = 0xBC,
|
||||
Framebuffer0Size = 0x118,
|
||||
Framebuffer0AFirstAddr = 0x119,
|
||||
Framebuffer0ASecondAddr = 0x11A,
|
||||
Framebuffer0Config = 0x11B,
|
||||
Framebuffer0Select = 0x11D,
|
||||
Framebuffer0Stride = 0x123,
|
||||
Framebuffer0BFirstAddr = 0x124,
|
||||
Framebuffer0BSecondAddr = 0x125,
|
||||
Framebuffer1Size = 0x156,
|
||||
Framebuffer1AFirstAddr = 0x159,
|
||||
Framebuffer1ASecondAddr = 0x15A,
|
||||
Framebuffer1Config = 0x15B,
|
||||
Framebuffer1Select = 0x15D,
|
||||
Framebuffer1Stride = 0x163,
|
||||
Framebuffer1BFirstAddr = 0x164,
|
||||
Framebuffer1BSecondAddr = 0x165,
|
||||
TransferInputPAddr = 0x2FF,
|
||||
TransferOutputPAddr = 0x300,
|
||||
DisplayTransferOutputDim = 0x301,
|
||||
DisplayTransferInputDim = 0x302,
|
||||
TransferFlags = 0x303,
|
||||
TransferTrigger = 0x305,
|
||||
TextureCopyTotalBytes = 0x307,
|
||||
TextureCopyInputLineGap = 0x308,
|
||||
TextureCopyOutputLineGap = 0x309,
|
||||
};
|
||||
}
|
||||
|
||||
enum class Scaling : u32 {
|
||||
None = 0,
|
||||
X = 1,
|
||||
XY = 2,
|
||||
};
|
||||
|
||||
namespace Lights {
|
||||
enum : u32 {
|
||||
LUT_D0 = 0,
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "termcolor.hpp"
|
||||
|
||||
|
@ -30,6 +31,17 @@ using s32 = std::int32_t;
|
|||
using s64 = std::int64_t;
|
||||
|
||||
namespace Helpers {
|
||||
template <class... Args>
|
||||
std::string format(const std::string& fmt, Args&&... args) {
|
||||
const int size = std::snprintf(nullptr, 0, fmt.c_str(), args...) + 1;
|
||||
if (size <= 0) {
|
||||
return {};
|
||||
}
|
||||
const auto buf = std::make_unique<char[]>(size);
|
||||
std::snprintf(buf.get(), size, fmt.c_str(), args ...);
|
||||
return std::string(buf.get(), buf.get() + size - 1);
|
||||
}
|
||||
|
||||
// Unconditional panic, unlike panicDev which does not panic on user builds
|
||||
template <class... Args>
|
||||
[[noreturn]] static void panic(const char* fmt, Args&&... args) {
|
||||
|
|
73
include/math_util.hpp
Normal file
73
include/math_util.hpp
Normal file
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project / 2023 Panda3DS Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Math {
|
||||
|
||||
template <class T>
|
||||
struct Rectangle {
|
||||
T left{};
|
||||
T top{};
|
||||
T right{};
|
||||
T bottom{};
|
||||
|
||||
constexpr Rectangle() = default;
|
||||
|
||||
constexpr Rectangle(T left, T top, T right, T bottom)
|
||||
: left(left), top(top), right(right), bottom(bottom) {}
|
||||
|
||||
[[nodiscard]] constexpr bool operator==(const Rectangle<T>& rhs) const {
|
||||
return (left == rhs.left) && (top == rhs.top) && (right == rhs.right) &&
|
||||
(bottom == rhs.bottom);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr bool operator!=(const Rectangle<T>& rhs) const {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Rectangle<T> operator*(const T value) const {
|
||||
return Rectangle{left * value, top * value, right * value, bottom * value};
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Rectangle<T> operator/(const T value) const {
|
||||
return Rectangle{left / value, top / value, right / value, bottom / value};
|
||||
}
|
||||
|
||||
[[nodiscard]] T getWidth() const {
|
||||
return std::abs(static_cast<std::make_signed_t<T>>(right - left));
|
||||
}
|
||||
|
||||
[[nodiscard]] T getHeight() const {
|
||||
return std::abs(static_cast<std::make_signed_t<T>>(bottom - top));
|
||||
}
|
||||
|
||||
[[nodiscard]] T getArea() const {
|
||||
return getWidth() * getHeight();
|
||||
}
|
||||
|
||||
[[nodiscard]] Rectangle<T> translateX(const T x) const {
|
||||
return Rectangle{left + x, top, right + x, bottom};
|
||||
}
|
||||
|
||||
[[nodiscard]] Rectangle<T> translateY(const T y) const {
|
||||
return Rectangle{left, top + y, right, bottom + y};
|
||||
}
|
||||
|
||||
[[nodiscard]] Rectangle<T> scale(const float s) const {
|
||||
return Rectangle{left, top, static_cast<T>(left + getWidth() * s),
|
||||
static_cast<T>(top + getHeight() * s)};
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
Rectangle(T, T, T, T) -> Rectangle<T>;
|
||||
|
||||
template <typename T>
|
||||
using Rect = Rectangle<T>;
|
||||
|
||||
} // end namespace Math
|
|
@ -261,4 +261,4 @@ public:
|
|||
|
||||
void setVRAM(u8* pointer) { vram = pointer; }
|
||||
bool allocateMainThreadStack(u32 size);
|
||||
};
|
||||
};
|
||||
|
|
|
@ -21,8 +21,11 @@ struct SDL_Window;
|
|||
class Renderer {
|
||||
protected:
|
||||
GPU& gpu;
|
||||
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
|
||||
static constexpr u32 regNum = 0x300; // Number of internal PICA registers
|
||||
static constexpr u32 extRegNum = 0x1000; // Number of external PICA registers
|
||||
|
||||
const std::array<u32, regNum>& regs;
|
||||
const std::array<u32, extRegNum>& externalRegs;
|
||||
|
||||
std::array<u32, 2> fbSize; // The size of the framebuffer (ie both the colour and depth buffer)'
|
||||
|
||||
|
@ -34,7 +37,7 @@ class Renderer {
|
|||
PICA::DepthFmt depthBufferFormat;
|
||||
|
||||
public:
|
||||
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs);
|
||||
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
virtual ~Renderer();
|
||||
|
||||
static constexpr u32 vertexBufferSize = 0x10000;
|
||||
|
@ -46,6 +49,7 @@ class Renderer {
|
|||
virtual void initGraphicsContext(SDL_Window* window) = 0; // Initialize graphics context
|
||||
virtual void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) = 0; // Clear a GPU buffer in VRAM
|
||||
virtual void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) = 0; // Perform display transfer
|
||||
virtual void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) = 0;
|
||||
virtual void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) = 0; // Draw the given vertices
|
||||
|
||||
virtual void screenshot(const std::string& name) = 0;
|
||||
|
@ -65,4 +69,4 @@ class Renderer {
|
|||
|
||||
void setColourBufferLoc(u32 loc) { colourBufferLoc = loc; }
|
||||
void setDepthBufferLoc(u32 loc) { depthBufferLoc = loc; }
|
||||
};
|
||||
};
|
||||
|
|
|
@ -34,6 +34,8 @@ struct GLStateManager {
|
|||
bool redMask, greenMask, blueMask, alphaMask;
|
||||
bool depthMask;
|
||||
|
||||
float clearRed, clearBlue, clearGreen, clearAlpha;
|
||||
|
||||
GLuint stencilMask;
|
||||
GLuint boundVAO;
|
||||
GLuint boundVBO;
|
||||
|
@ -44,6 +46,7 @@ struct GLStateManager {
|
|||
|
||||
void reset();
|
||||
void resetBlend();
|
||||
void resetClearing();
|
||||
void resetClipping();
|
||||
void resetColourMask();
|
||||
void resetDepth();
|
||||
|
@ -209,6 +212,17 @@ struct GLStateManager {
|
|||
}
|
||||
}
|
||||
|
||||
void setClearColour(float r, float g, float b, float a) {
|
||||
if (clearRed != r || clearGreen != g || clearBlue != b || clearAlpha != a) {
|
||||
clearRed = r;
|
||||
clearGreen = g;
|
||||
clearBlue = b;
|
||||
clearAlpha = a;
|
||||
|
||||
OpenGL::setClearColor(r, g, b, a);
|
||||
}
|
||||
}
|
||||
|
||||
void setDepthFunc(OpenGL::DepthFunc func) { setDepthFunc(static_cast<GLenum>(func)); }
|
||||
};
|
||||
|
||||
|
|
|
@ -44,8 +44,8 @@ class RendererGL final : public Renderer {
|
|||
float oldDepthOffset = 0.0;
|
||||
bool oldDepthmapEnable = false;
|
||||
|
||||
SurfaceCache<DepthBuffer, 10, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 10, true> colourBufferCache;
|
||||
SurfaceCache<DepthBuffer, 16, true> depthBufferCache;
|
||||
SurfaceCache<ColourBuffer, 16, true> colourBufferCache;
|
||||
SurfaceCache<Texture, 256, true> textureCache;
|
||||
|
||||
// Dummy VAO/VBO for blitting the final output
|
||||
|
@ -68,7 +68,8 @@ class RendererGL final : public Renderer {
|
|||
void updateLightingLUT();
|
||||
|
||||
public:
|
||||
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs) : Renderer(gpu, internalRegs) {}
|
||||
RendererGL(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: Renderer(gpu, internalRegs, externalRegs) {}
|
||||
~RendererGL() override;
|
||||
|
||||
void reset() override;
|
||||
|
@ -76,8 +77,11 @@ class RendererGL final : public Renderer {
|
|||
void initGraphicsContext(SDL_Window* window) override; // Initialize graphics context
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override; // Clear a GPU buffer in VRAM
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override; // Perform display transfer
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override; // Draw the given vertices
|
||||
|
||||
std::optional<ColourBuffer> getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound = true);
|
||||
|
||||
// Take a screenshot of the screen and store it in a file
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
|
|
|
@ -76,6 +76,16 @@ public:
|
|||
|
||||
size++;
|
||||
|
||||
// Find an existing surface we completely invalidate and overwrite it with the new surface
|
||||
for (auto& e : buffer) {
|
||||
if (e.valid && e.range.lower() >= surface.range.lower() && e.range.upper() <= surface.range.upper()) {
|
||||
e.free();
|
||||
e = surface;
|
||||
e.allocate();
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
// Find an invalid entry in the cache and overwrite it with the new surface
|
||||
for (auto& e : buffer) {
|
||||
if (!e.valid) {
|
||||
|
|
|
@ -2,62 +2,70 @@
|
|||
#include "PICA/regs.hpp"
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "opengl.hpp"
|
||||
|
||||
template <typename T>
|
||||
using Interval = boost::icl::right_open_interval<T>;
|
||||
|
||||
struct ColourBuffer {
|
||||
u32 location;
|
||||
PICA::ColorFmt format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
u32 location;
|
||||
PICA::ColorFmt format;
|
||||
OpenGL::uvec2 size;
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
// OpenGL resources allocated to buffer
|
||||
OpenGL::Texture texture;
|
||||
OpenGL::Framebuffer fbo;
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
// OpenGL resources allocated to buffer
|
||||
OpenGL::Texture texture;
|
||||
OpenGL::Framebuffer fbo;
|
||||
|
||||
ColourBuffer() : valid(false) {}
|
||||
ColourBuffer() : valid(false) {}
|
||||
|
||||
ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true)
|
||||
: location(loc), format(format), size({x, y}), valid(valid) {
|
||||
ColourBuffer(u32 loc, PICA::ColorFmt format, u32 x, u32 y, bool valid = true) : location(loc), format(format), size({x, y}), valid(valid) {
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
void allocate() {
|
||||
// Create texture for the FBO, setting up filters and the like
|
||||
// Reading back the current texture is slow, but allocate calls should be few and far between.
|
||||
// If this becomes a bottleneck, we can fix it semi-easily
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
texture.create(size.x(), size.y(), GL_RGBA8);
|
||||
texture.bind();
|
||||
texture.setMinFilter(OpenGL::Linear);
|
||||
texture.setMagFilter(OpenGL::Linear);
|
||||
glBindTexture(GL_TEXTURE_2D, prevTexture);
|
||||
|
||||
void allocate() {
|
||||
// Create texture for the FBO, setting up filters and the like
|
||||
// Reading back the current texture is slow, but allocate calls should be few and far between.
|
||||
// If this becomes a bottleneck, we can fix it semi-easily
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
texture.create(size.x(), size.y(), GL_RGBA8);
|
||||
texture.bind();
|
||||
texture.setMinFilter(OpenGL::Linear);
|
||||
texture.setMagFilter(OpenGL::Linear);
|
||||
glBindTexture(GL_TEXTURE_2D, prevTexture);
|
||||
#ifdef GPU_DEBUG_INFO
|
||||
const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location);
|
||||
OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str());
|
||||
#endif
|
||||
|
||||
//Helpers::panic("Creating FBO: %d, %d\n", size.x(), size.y());
|
||||
fbo.createWithDrawTexture(texture);
|
||||
fbo.bind(OpenGL::DrawAndReadFramebuffer);
|
||||
|
||||
fbo.createWithDrawTexture(texture);
|
||||
fbo.bind(OpenGL::DrawAndReadFramebuffer);
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
|
||||
Helpers::panic("Incomplete framebuffer");
|
||||
}
|
||||
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
|
||||
Helpers::panic("Incomplete framebuffer");
|
||||
// TODO: This should not clear the framebuffer contents. It should load them from VRAM.
|
||||
GLint oldViewport[4];
|
||||
GLfloat oldClearColour[4];
|
||||
|
||||
// TODO: This should not clear the framebuffer contents. It should load them from VRAM.
|
||||
GLint oldViewport[4];
|
||||
glGetIntegerv(GL_VIEWPORT, oldViewport);
|
||||
OpenGL::setViewport(size.x(), size.y());
|
||||
OpenGL::setClearColor(0.0, 0.0, 0.0, 1.0);
|
||||
OpenGL::clearColor();
|
||||
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
|
||||
}
|
||||
glGetIntegerv(GL_VIEWPORT, oldViewport);
|
||||
glGetFloatv(GL_COLOR_CLEAR_VALUE, oldClearColour);
|
||||
|
||||
void free() {
|
||||
OpenGL::setViewport(size.x(), size.y());
|
||||
OpenGL::setClearColor(0.0, 0.0, 0.0, 1.0);
|
||||
OpenGL::clearColor();
|
||||
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
|
||||
OpenGL::setClearColor(oldClearColour[0], oldClearColour[1], oldClearColour[2], oldClearColour[3]);
|
||||
}
|
||||
|
||||
void free() {
|
||||
valid = false;
|
||||
|
||||
if (texture.exists() || fbo.exists()) {
|
||||
|
@ -66,88 +74,102 @@ struct ColourBuffer {
|
|||
}
|
||||
}
|
||||
|
||||
bool matches(ColourBuffer& other) {
|
||||
return location == other.location && format == other.format &&
|
||||
size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
Math::Rect<u32> getSubRect(u32 inputAddress, u32 width, u32 height) {
|
||||
// PICA textures have top-left origin while OpenGL has bottom-left origin.
|
||||
// Flip the rectangle on the x axis to account for this.
|
||||
const u32 startOffset = (inputAddress - location) / sizePerPixel(format);
|
||||
const u32 x0 = (startOffset % (size.x() * 8)) / 8;
|
||||
const u32 y0 = (startOffset / (size.x() * 8)) * 8;
|
||||
return Math::Rect<u32>{x0, size.y() - y0, x0 + width, size.y() - height - y0};
|
||||
}
|
||||
|
||||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format);
|
||||
}
|
||||
bool matches(ColourBuffer& other) {
|
||||
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format);
|
||||
}
|
||||
};
|
||||
|
||||
struct DepthBuffer {
|
||||
u32 location;
|
||||
PICA::DepthFmt format;
|
||||
OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer
|
||||
bool valid;
|
||||
u32 location;
|
||||
PICA::DepthFmt format;
|
||||
OpenGL::uvec2 size; // Implicitly set to the size of the framebuffer
|
||||
bool valid;
|
||||
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
// OpenGL texture used for storing depth/stencil
|
||||
OpenGL::Texture texture;
|
||||
OpenGL::Framebuffer fbo;
|
||||
// Range of VRAM taken up by buffer
|
||||
Interval<u32> range;
|
||||
// OpenGL texture used for storing depth/stencil
|
||||
OpenGL::Texture texture;
|
||||
OpenGL::Framebuffer fbo;
|
||||
|
||||
DepthBuffer() : valid(false) {}
|
||||
DepthBuffer() : valid(false) {}
|
||||
|
||||
DepthBuffer(u32 loc, PICA::DepthFmt format, u32 x, u32 y, bool valid = true) :
|
||||
location(loc), format(format), size({x, y}), valid(valid) {
|
||||
DepthBuffer(u32 loc, PICA::DepthFmt format, u32 x, u32 y, bool valid = true) : location(loc), format(format), size({x, y}), valid(valid) {
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
|
||||
u64 endLoc = (u64)loc + sizeInBytes();
|
||||
// Check if start and end are valid here
|
||||
range = Interval<u32>(loc, (u32)endLoc);
|
||||
}
|
||||
void allocate() {
|
||||
// Create texture for the FBO, setting up filters and the like
|
||||
// Reading back the current texture is slow, but allocate calls should be few and far between.
|
||||
// If this becomes a bottleneck, we can fix it semi-easily
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
|
||||
void allocate() {
|
||||
// Create texture for the FBO, setting up filters and the like
|
||||
// Reading back the current texture is slow, but allocate calls should be few and far between.
|
||||
// If this becomes a bottleneck, we can fix it semi-easily
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
// Internal formats for the texture based on format
|
||||
static constexpr std::array<GLenum, 4> internalFormats = {
|
||||
GL_DEPTH_COMPONENT16,
|
||||
GL_DEPTH_COMPONENT24,
|
||||
GL_DEPTH_COMPONENT24,
|
||||
GL_DEPTH24_STENCIL8,
|
||||
};
|
||||
|
||||
// Internal formats for the texture based on format
|
||||
static constexpr std::array<GLenum, 4> internalFormats = {
|
||||
GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT24, GL_DEPTH24_STENCIL8
|
||||
};
|
||||
// Format of the texture
|
||||
static constexpr std::array<GLenum, 4> formats = {
|
||||
GL_DEPTH_COMPONENT,
|
||||
GL_DEPTH_COMPONENT,
|
||||
GL_DEPTH_COMPONENT,
|
||||
GL_DEPTH_STENCIL,
|
||||
};
|
||||
|
||||
static constexpr std::array<GLenum, 4> types = {
|
||||
GL_UNSIGNED_SHORT,
|
||||
GL_UNSIGNED_INT,
|
||||
GL_UNSIGNED_INT,
|
||||
GL_UNSIGNED_INT_24_8,
|
||||
};
|
||||
|
||||
// Format of the texture
|
||||
static constexpr std::array<GLenum, 4> formats = {
|
||||
GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL
|
||||
};
|
||||
auto internalFormat = internalFormats[(int)format];
|
||||
auto fmt = formats[(int)format];
|
||||
auto type = types[(int)format];
|
||||
|
||||
static constexpr std::array<GLenum, 4> types = {
|
||||
GL_UNSIGNED_SHORT, GL_UNSIGNED_INT, GL_UNSIGNED_INT, GL_UNSIGNED_INT_24_8
|
||||
};
|
||||
texture.createDSTexture(size.x(), size.y(), internalFormat, fmt, nullptr, type, GL_TEXTURE_2D);
|
||||
texture.bind();
|
||||
texture.setMinFilter(OpenGL::Nearest);
|
||||
texture.setMagFilter(OpenGL::Nearest);
|
||||
|
||||
auto internalFormat = internalFormats[(int)format];
|
||||
auto fmt = formats[(int)format];
|
||||
auto type = types[(int)format];
|
||||
glBindTexture(GL_TEXTURE_2D, prevTexture);
|
||||
fbo.createWithDrawTexture(texture, fmt == GL_DEPTH_STENCIL ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT);
|
||||
|
||||
texture.createDSTexture(size.x(), size.y(), internalFormat, fmt, nullptr, type, GL_TEXTURE_2D);
|
||||
texture.bind();
|
||||
texture.setMinFilter(OpenGL::Nearest);
|
||||
texture.setMagFilter(OpenGL::Nearest);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, prevTexture);
|
||||
|
||||
fbo.createWithDrawTexture(texture, fmt == GL_DEPTH_STENCIL ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT);
|
||||
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
|
||||
Helpers::panic("Incomplete framebuffer");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void free() {
|
||||
void free() {
|
||||
valid = false;
|
||||
if (texture.exists()) {
|
||||
texture.free();
|
||||
}
|
||||
}
|
||||
|
||||
bool matches(DepthBuffer& other) {
|
||||
return location == other.location && format == other.format &&
|
||||
size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
bool matches(DepthBuffer& other) {
|
||||
return location == other.location && format == other.format && size.x() == other.size.x() && size.y() == other.size.y();
|
||||
}
|
||||
|
||||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format);
|
||||
}
|
||||
size_t sizeInBytes() {
|
||||
return (size_t)size.x() * (size_t)size.y() * PICA::sizePerPixel(format);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include "PICA/regs.hpp"
|
||||
#include "boost/icl/interval.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "math_util.hpp"
|
||||
#include "opengl.hpp"
|
||||
|
||||
template <typename T>
|
||||
|
@ -53,7 +54,7 @@ struct Texture {
|
|||
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
|
||||
|
||||
// Returns the format of this texture as a string
|
||||
std::string formatToString() {
|
||||
std::string_view formatToString() {
|
||||
return PICA::textureFormatToString(format);
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ class GPU;
|
|||
|
||||
class RendererNull final : public Renderer {
|
||||
public:
|
||||
RendererNull(GPU& gpu, const std::array<u32, regNum>& internalRegs);
|
||||
RendererNull(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
~RendererNull() override;
|
||||
|
||||
void reset() override;
|
||||
|
@ -12,6 +12,7 @@ class RendererNull final : public Renderer {
|
|||
void initGraphicsContext(SDL_Window* window) override;
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -4,7 +4,7 @@ class GPU;
|
|||
|
||||
class RendererSw final : public Renderer {
|
||||
public:
|
||||
RendererSw(GPU& gpu, const std::array<u32, regNum>& internalRegs);
|
||||
RendererSw(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
~RendererSw() override;
|
||||
|
||||
void reset() override;
|
||||
|
@ -12,6 +12,7 @@ class RendererSw final : public Renderer {
|
|||
void initGraphicsContext(SDL_Window* window) override;
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -44,7 +44,7 @@ class RendererVK final : public Renderer {
|
|||
|
||||
u64 currentFrame = 0;
|
||||
public:
|
||||
RendererVK(GPU& gpu, const std::array<u32, regNum>& internalRegs);
|
||||
RendererVK(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs);
|
||||
~RendererVK() override;
|
||||
|
||||
void reset() override;
|
||||
|
@ -52,6 +52,7 @@ class RendererVK final : public Renderer {
|
|||
void initGraphicsContext(SDL_Window* window) override;
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) override;
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) override;
|
||||
void drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) override;
|
||||
void screenshot(const std::string& name) override;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -40,11 +40,32 @@ class GPUService {
|
|||
MAKE_LOG_FUNCTION(log, gspGPULogger)
|
||||
void processCommandBuffer();
|
||||
|
||||
struct FramebufferInfo {
|
||||
u32 activeFb;
|
||||
u32 leftFramebufferVaddr;
|
||||
u32 rightFramebufferVaddr;
|
||||
u32 stride;
|
||||
u32 format;
|
||||
u32 displayFb;
|
||||
u32 attribute;
|
||||
};
|
||||
static_assert(sizeof(FramebufferInfo) == 28, "GSP::GPU::FramebufferInfo has the wrong size");
|
||||
|
||||
struct FramebufferUpdate {
|
||||
u8 index;
|
||||
u8 dirtyFlag;
|
||||
u16 pad0;
|
||||
std::array<FramebufferInfo, 2> framebufferInfo;
|
||||
u32 pad1;
|
||||
};
|
||||
static_assert(sizeof(FramebufferUpdate) == 64, "GSP::GPU::FramebufferUpdate has the wrong size");
|
||||
|
||||
// Service commands
|
||||
void acquireRight(u32 messagePointer);
|
||||
void flushDataCache(u32 messagePointer);
|
||||
void registerInterruptRelayQueue(u32 messagePointer);
|
||||
void setAxiConfigQoSMode(u32 messagePointer);
|
||||
void setBufferSwap(u32 messagePointer);
|
||||
void setInternalPriorities(u32 messagePointer);
|
||||
void setLCDForceBlack(u32 messagePointer);
|
||||
void storeDataCache(u32 messagePointer);
|
||||
|
@ -60,6 +81,8 @@ class GPUService {
|
|||
void triggerTextureCopy(u32* cmd);
|
||||
void flushCacheRegions(u32* cmd);
|
||||
|
||||
void setBufferSwapImpl(u32 screen_id, const FramebufferInfo& info);
|
||||
|
||||
public:
|
||||
GPUService(Memory& mem, GPU& gpu, Kernel& kernel, u32& currentPID) : mem(mem), gpu(gpu),
|
||||
kernel(kernel), currentPID(currentPID) {}
|
||||
|
@ -72,4 +95,4 @@ public:
|
|||
std::memset(ptr, 0, 0x1000);
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
#include "renderer_vk/renderer_vk.hpp"
|
||||
#endif
|
||||
|
||||
constexpr u32 topScreenWidth = 240;
|
||||
constexpr u32 topScreenHeight = 400;
|
||||
|
||||
constexpr u32 bottomScreenWidth = 240;
|
||||
constexpr u32 bottomScreenHeight = 300;
|
||||
|
||||
using namespace Floats;
|
||||
|
||||
// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it
|
||||
|
@ -26,24 +32,24 @@ GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
|
|||
|
||||
switch (config.rendererType) {
|
||||
case RendererType::Null: {
|
||||
renderer.reset(new RendererNull(*this, regs));
|
||||
renderer.reset(new RendererNull(*this, regs, externalRegs));
|
||||
break;
|
||||
}
|
||||
|
||||
case RendererType::Software: {
|
||||
renderer.reset(new RendererSw(*this, regs));
|
||||
renderer.reset(new RendererSw(*this, regs, externalRegs));
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef PANDA3DS_ENABLE_OPENGL
|
||||
case RendererType::OpenGL: {
|
||||
renderer.reset(new RendererGL(*this, regs));
|
||||
renderer.reset(new RendererGL(*this, regs, externalRegs));
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef PANDA3DS_ENABLE_VULKAN
|
||||
case RendererType::Vulkan: {
|
||||
renderer.reset(new RendererVK(*this, regs));
|
||||
renderer.reset(new RendererVK(*this, regs, externalRegs));
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
@ -78,6 +84,27 @@ void GPU::reset() {
|
|||
e.config2 = 0;
|
||||
}
|
||||
|
||||
// Initialize the framebuffer registers. Values taken from Citra.
|
||||
|
||||
using namespace PICA::ExternalRegs;
|
||||
// Top screen addresses and dimentions.
|
||||
externalRegs[Framebuffer0AFirstAddr] = 0x181E6000;
|
||||
externalRegs[Framebuffer0ASecondAddr] = 0x1822C800;
|
||||
externalRegs[Framebuffer0BFirstAddr] = 0x18273000;
|
||||
externalRegs[Framebuffer0BSecondAddr] = 0x182B9800;
|
||||
externalRegs[Framebuffer0Size] = (topScreenHeight << 16) | topScreenWidth;
|
||||
externalRegs[Framebuffer0Stride] = 720;
|
||||
externalRegs[Framebuffer0Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
|
||||
externalRegs[Framebuffer0Select] = 0;
|
||||
|
||||
// Bottom screen addresses and dimentions.
|
||||
externalRegs[Framebuffer1AFirstAddr] = 0x1848F000;
|
||||
externalRegs[Framebuffer1ASecondAddr] = 0x184C7800;
|
||||
externalRegs[Framebuffer1Size] = (bottomScreenHeight << 16) | bottomScreenWidth;
|
||||
externalRegs[Framebuffer1Stride] = 720;
|
||||
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
|
||||
externalRegs[Framebuffer1Select] = 0;
|
||||
|
||||
renderer->reset();
|
||||
}
|
||||
|
||||
|
@ -321,15 +348,17 @@ PICA::Vertex GPU::getImmediateModeVertex() {
|
|||
|
||||
// Run VS and return vertex data. TODO: Don't hardcode offsets for each attribute
|
||||
shaderUnit.vs.run();
|
||||
std::memcpy(&v.s.positions, &shaderUnit.vs.outputs[0], sizeof(vec4f));
|
||||
std::memcpy(&v.s.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f));
|
||||
std::memcpy(&v.s.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24));
|
||||
|
||||
// Map shader outputs to fixed function properties
|
||||
const u32 totalShaderOutputs = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
||||
for (int i = 0; i < totalShaderOutputs; i++) {
|
||||
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
|
||||
|
||||
printf(
|
||||
"(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3]
|
||||
);
|
||||
printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.s.colour[0], (double)v.s.colour[1], (double)v.s.colour[2], (double)v.s.colour[3]);
|
||||
printf("(u, v ) = (%f, %f)\n", (double)v.s.texcoord0[0], (double)v.s.texcoord0[1]);
|
||||
for (int j = 0; j < 4; j++) { // pls unroll
|
||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||
v.raw[mapping] = shaderUnit.vs.outputs[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
|
|
@ -19,11 +19,36 @@ void GPU::writeReg(u32 address, u32 value) {
|
|||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
const u32 index = (address - 0x1EF01000) / sizeof(u32);
|
||||
writeInternalReg(index, value, 0xffffffff);
|
||||
} else if (address >= 0x1EF00004 && address < 0x1EF01000) {
|
||||
const u32 index = (address - 0x1EF00004) / sizeof(u32);
|
||||
writeExternalReg(index, value);
|
||||
} else {
|
||||
log("Ignoring write to external GPU register %08X. Value: %08X\n", address, value);
|
||||
log("Ignoring write to unknown GPU register %08X. Value: %08X\n", address, value);
|
||||
}
|
||||
}
|
||||
|
||||
u32 GPU::readExternalReg(u32 index) {
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
if (index > 0x1000) [[unlikely]] {
|
||||
Helpers::panic("Tried to read invalid external GPU register. Index: %X\n", index);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return externalRegs[index];
|
||||
}
|
||||
|
||||
void GPU::writeExternalReg(u32 index, u32 value) {
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
if (index > 0x1000) [[unlikely]] {
|
||||
Helpers::panic("Tried to write to invalid external GPU register. Index: %X, value: %08X\n", index, value);
|
||||
return;
|
||||
}
|
||||
|
||||
externalRegs[index] = value;
|
||||
}
|
||||
|
||||
u32 GPU::readInternalReg(u32 index) {
|
||||
using namespace PICA::InternalRegs;
|
||||
|
||||
|
@ -384,4 +409,4 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
writeInternalReg(id, param, mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,15 @@ void GLStateManager::resetBlend() {
|
|||
OpenGL::setLogicOp(GL_COPY);
|
||||
}
|
||||
|
||||
void GLStateManager::resetClearing() {
|
||||
clearRed = 0.f;
|
||||
clearBlue = 0.f;
|
||||
clearGreen = 0.f;
|
||||
clearAlpha = 1.f;
|
||||
|
||||
OpenGL::setClearColor(clearRed, clearBlue, clearGreen, clearAlpha);
|
||||
}
|
||||
|
||||
void GLStateManager::resetClipping() {
|
||||
// Disable all (supported) clip planes
|
||||
enabledClipPlanes = 0;
|
||||
|
@ -64,6 +73,7 @@ void GLStateManager::resetProgram() {
|
|||
|
||||
void GLStateManager::reset() {
|
||||
resetBlend();
|
||||
resetClearing();
|
||||
resetClipping();
|
||||
resetColourMask();
|
||||
resetDepth();
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "math_util.hpp"
|
||||
|
||||
CMRC_DECLARE(RendererGL);
|
||||
|
||||
|
@ -134,13 +135,15 @@ void RendererGL::initGraphicsContext(SDL_Window* window) {
|
|||
screenFramebuffer.createWithDrawTexture(screenTexture);
|
||||
screenFramebuffer.bind(OpenGL::DrawAndReadFramebuffer);
|
||||
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) Helpers::panic("Incomplete framebuffer");
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
|
||||
Helpers::panic("Incomplete framebuffer");
|
||||
}
|
||||
|
||||
// TODO: This should not clear the framebuffer contents. It should load them from VRAM.
|
||||
GLint oldViewport[4];
|
||||
glGetIntegerv(GL_VIEWPORT, oldViewport);
|
||||
OpenGL::setViewport(screenTextureWidth, screenTextureHeight);
|
||||
OpenGL::setClearColor(0.0, 0.0, 0.0, 1.0);
|
||||
gl.setClearColour(0.0, 0.0, 0.0, 1.0);
|
||||
OpenGL::clearColor();
|
||||
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
|
||||
|
||||
|
@ -368,8 +371,8 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
}
|
||||
|
||||
setupBlending();
|
||||
OpenGL::Framebuffer poop = getColourFBO();
|
||||
poop.bind(OpenGL::DrawAndReadFramebuffer);
|
||||
auto poop = getColourBuffer(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]);
|
||||
poop->fbo.bind(OpenGL::DrawAndReadFramebuffer);
|
||||
|
||||
const u32 depthControl = regs[PICA::InternalRegs::DepthAndColorMask];
|
||||
const bool depthWrite = regs[PICA::InternalRegs::DepthBufferWrite];
|
||||
|
@ -412,10 +415,12 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
updateLightingLUT();
|
||||
}
|
||||
|
||||
// TODO: Actually use this
|
||||
GLsizei viewportWidth = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f);
|
||||
GLsizei viewportHeight = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f);
|
||||
OpenGL::setViewport(viewportWidth, viewportHeight);
|
||||
const GLsizei viewportX = regs[PICA::InternalRegs::ViewportXY] & 0x3ff;
|
||||
const GLsizei viewportY = (regs[PICA::InternalRegs::ViewportXY] >> 16) & 0x3ff;
|
||||
const GLsizei viewportWidth = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0f);
|
||||
const GLsizei viewportHeight = GLsizei(f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0f);
|
||||
const auto rect = poop->getSubRect(colourBufferLoc, fbSize[0], fbSize[1]);
|
||||
OpenGL::setViewport(rect.left + viewportX, rect.bottom + viewportY, viewportWidth, viewportHeight);
|
||||
|
||||
const u32 stencilConfig = regs[PICA::InternalRegs::StencilTest];
|
||||
const bool stencilEnable = getBit<0>(stencilConfig);
|
||||
|
@ -450,6 +455,42 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
|
|||
|
||||
void RendererGL::display() {
|
||||
gl.disableScissor();
|
||||
gl.disableBlend();
|
||||
gl.disableDepth();
|
||||
gl.disableScissor();
|
||||
// This will work fine whether or not logic ops are enabled. We set logic op to copy instead of disabling to avoid state changes
|
||||
gl.setLogicOp(GL_COPY);
|
||||
gl.setColourMask(true, true, true, true);
|
||||
gl.useProgram(displayProgram);
|
||||
gl.bindVAO(dummyVAO);
|
||||
|
||||
gl.disableClipPlane(0);
|
||||
gl.disableClipPlane(1);
|
||||
|
||||
screenFramebuffer.bind(OpenGL::DrawFramebuffer);
|
||||
gl.setClearColour(0.f, 0.f, 0.f, 1.f);
|
||||
OpenGL::clearColor();
|
||||
|
||||
using namespace PICA::ExternalRegs;
|
||||
const u32 topActiveFb = externalRegs[Framebuffer0Select] & 1;
|
||||
const u32 topScreenAddr = externalRegs[topActiveFb == 0 ? Framebuffer0AFirstAddr : Framebuffer0ASecondAddr];
|
||||
auto topScreen = colourBufferCache.findFromAddress(topScreenAddr);
|
||||
|
||||
if (topScreen) {
|
||||
topScreen->get().texture.bind();
|
||||
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
|
||||
}
|
||||
|
||||
const u32 bottomActiveFb = externalRegs[Framebuffer1Select] & 1;
|
||||
const u32 bottomScreenAddr = externalRegs[bottomActiveFb == 0 ? Framebuffer1AFirstAddr : Framebuffer1ASecondAddr];
|
||||
auto bottomScreen = colourBufferCache.findFromAddress(bottomScreenAddr);
|
||||
|
||||
if (bottomScreen) {
|
||||
bottomScreen->get().texture.bind();
|
||||
OpenGL::setViewport(40, 0, 320, 240);
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4);
|
||||
}
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
|
||||
screenFramebuffer.bind(OpenGL::ReadFramebuffer);
|
||||
|
@ -467,8 +508,9 @@ void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co
|
|||
const float b = getBits<8, 8>(value) / 255.0f;
|
||||
const float a = (value & 0xff) / 255.0f;
|
||||
color->get().fbo.bind(OpenGL::DrawFramebuffer);
|
||||
|
||||
gl.setColourMask(true, true, true, true);
|
||||
OpenGL::setClearColor(r, g, b, a);
|
||||
gl.setClearColour(r, g, b, a);
|
||||
OpenGL::clearColor();
|
||||
return;
|
||||
}
|
||||
|
@ -505,7 +547,7 @@ void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co
|
|||
|
||||
OpenGL::Framebuffer RendererGL::getColourFBO() {
|
||||
// We construct a colour buffer object and see if our cache has any matching colour buffers in it
|
||||
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
|
||||
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
|
||||
ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]);
|
||||
auto buffer = colourBufferCache.find(sampleBuffer);
|
||||
|
||||
|
@ -550,42 +592,152 @@ OpenGL::Texture RendererGL::getTexture(Texture& tex) {
|
|||
}
|
||||
}
|
||||
|
||||
// NOTE: The GPU format has RGB5551 and RGB655 swapped compared to internal regs format
|
||||
PICA::ColorFmt ToColorFmt(u32 format) {
|
||||
switch (format) {
|
||||
case 2: return PICA::ColorFmt::RGB565;
|
||||
case 3: return PICA::ColorFmt::RGBA5551;
|
||||
default: return static_cast<PICA::ColorFmt>(format);
|
||||
}
|
||||
}
|
||||
|
||||
void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
const u32 inputWidth = inputSize & 0xffff;
|
||||
const u32 inputGap = inputSize >> 16;
|
||||
const u32 inputHeight = inputSize >> 16;
|
||||
const auto inputFormat = ToColorFmt(Helpers::getBits<8, 3>(flags));
|
||||
const auto outputFormat = ToColorFmt(Helpers::getBits<12, 3>(flags));
|
||||
const bool verticalFlip = flags & 1;
|
||||
const PICA::Scaling scaling = static_cast<PICA::Scaling>(Helpers::getBits<24, 2>(flags));
|
||||
|
||||
const u32 outputWidth = outputSize & 0xffff;
|
||||
const u32 outputGap = outputSize >> 16;
|
||||
u32 outputWidth = outputSize & 0xffff;
|
||||
u32 outputHeight = outputSize >> 16;
|
||||
|
||||
auto framebuffer = colourBufferCache.findFromAddress(inputAddr);
|
||||
// If there's a framebuffer at this address, use it. Otherwise go back to our old hack and display framebuffer 0
|
||||
// Displays are hard I really don't want to try implementing them because getting a fast solution is terrible
|
||||
OpenGL::Texture& tex = framebuffer.has_value() ? framebuffer.value().get().texture : colourBufferCache[0].texture;
|
||||
OpenGL::DebugScope scope("DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputHeight %d outputHeight %d",
|
||||
inputAddr, outputAddr, inputWidth, outputWidth, inputHeight, outputHeight);
|
||||
|
||||
tex.bind();
|
||||
screenFramebuffer.bind(OpenGL::DrawFramebuffer);
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, outputHeight);
|
||||
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight);
|
||||
|
||||
gl.disableBlend();
|
||||
gl.disableLogicOp();
|
||||
gl.disableDepth();
|
||||
gl.disableScissor();
|
||||
gl.disableStencil();
|
||||
gl.setColourMask(true, true, true, true);
|
||||
gl.useProgram(displayProgram);
|
||||
gl.bindVAO(dummyVAO);
|
||||
|
||||
gl.disableClipPlane(0);
|
||||
gl.disableClipPlane(1);
|
||||
|
||||
// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
|
||||
// We consider output gap == 320 to mean bottom, and anything else to mean top
|
||||
if (outputGap == 320) {
|
||||
OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport
|
||||
} else {
|
||||
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
|
||||
if (verticalFlip) {
|
||||
std::swap(srcRect.bottom, srcRect.top);
|
||||
}
|
||||
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
|
||||
// Apply scaling for the destination rectangle.
|
||||
if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) {
|
||||
outputWidth >>= 1;
|
||||
}
|
||||
|
||||
if (scaling == PICA::Scaling::XY) {
|
||||
outputHeight >>= 1;
|
||||
}
|
||||
|
||||
auto destFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight);
|
||||
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight);
|
||||
|
||||
if (inputWidth != outputWidth) {
|
||||
// Helpers::warn("Strided display transfer is not handled correctly!\n");
|
||||
}
|
||||
|
||||
// Blit the framebuffers
|
||||
srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer);
|
||||
destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer);
|
||||
gl.disableScissor();
|
||||
|
||||
glBlitFramebuffer(
|
||||
srcRect.left, srcRect.bottom, srcRect.right, srcRect.top, destRect.left, destRect.bottom, destRect.right, destRect.top, GL_COLOR_BUFFER_BIT,
|
||||
GL_LINEAR
|
||||
);
|
||||
}
|
||||
|
||||
void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
// Texture copy size is aligned to 16 byte units
|
||||
const u32 copySize = totalBytes & ~0xf;
|
||||
if (copySize == 0) {
|
||||
printf("TextureCopy total bytes less than 16!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// The width and gap are provided in 16-byte units.
|
||||
const u32 inputWidth = (inputSize & 0xffff) << 4;
|
||||
const u32 inputGap = (inputSize >> 16) << 4;
|
||||
const u32 outputWidth = (outputSize & 0xffff) << 4;
|
||||
const u32 outputGap = (outputSize >> 16) << 4;
|
||||
|
||||
OpenGL::DebugScope scope("TextureCopy inputAddr 0x%08X outputAddr 0x%08X totalBytes %d inputWidth %d inputGap %d outputWidth %d outputGap %d",
|
||||
inputAddr, outputAddr, totalBytes, inputWidth, inputGap, outputWidth, outputGap);
|
||||
|
||||
if (inputGap != 0 || outputGap != 0) {
|
||||
// Helpers::warn("Strided texture copy\n");
|
||||
}
|
||||
if (inputWidth != outputWidth) {
|
||||
Helpers::warn("Input width does not match output width, cannot accelerate texture copy!");
|
||||
return;
|
||||
}
|
||||
|
||||
// Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine.
|
||||
// Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or
|
||||
// the width multiplied by eight (because tiles are stored linearly in memory).
|
||||
// To properly accelerate this we must examine each surface individually. For now we assume the most common case
|
||||
// of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting
|
||||
// that surface is not correct.
|
||||
|
||||
// We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it
|
||||
// by eight * sizePerPixel(RGBA8) to convert it to a useable width.
|
||||
const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8);
|
||||
const u32 copyStride = (inputWidth + inputGap) / (8 * bpp);
|
||||
const u32 copyWidth = inputWidth / (8 * bpp);
|
||||
|
||||
// inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region
|
||||
// in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result
|
||||
// is the number of vertical tiles so multiply that by eight to get the actual copy height.
|
||||
const u32 copyHeight = (copySize / inputWidth) * 8;
|
||||
|
||||
// Find the source surface.
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false);
|
||||
if (!srcFramebuffer) {
|
||||
static int shutUpCounter = 0; // Don't want to spam the console too much, so shut up after 5 times
|
||||
|
||||
if (shutUpCounter < 5) {
|
||||
shutUpCounter++;
|
||||
printf("RendererGL::TextureCopy failed to locate src framebuffer!\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight);
|
||||
|
||||
// Assume the destination surface has the same format. Unless the surfaces have the same block width,
|
||||
// texture copy does not make sense.
|
||||
auto destFramebuffer = getColourBuffer(outputAddr, srcFramebuffer->format, copyWidth, copyHeight);
|
||||
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight);
|
||||
|
||||
// Blit the framebuffers
|
||||
srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer);
|
||||
destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer);
|
||||
gl.disableScissor();
|
||||
|
||||
glBlitFramebuffer(
|
||||
srcRect.left, srcRect.bottom, srcRect.right, srcRect.top, destRect.left, destRect.bottom, destRect.right, destRect.top, GL_COLOR_BUFFER_BIT,
|
||||
GL_LINEAR
|
||||
);
|
||||
}
|
||||
|
||||
std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound) {
|
||||
// Try to find an already existing buffer that contains the provided address
|
||||
// This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to
|
||||
// subrect of a surface and in case of texcopy we don't know the format of the surface.
|
||||
auto buffer = colourBufferCache.findFromAddress(addr);
|
||||
if (buffer.has_value()) {
|
||||
return buffer.value().get();
|
||||
}
|
||||
|
||||
if (!createIfnotFound) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Otherwise create and cache a new buffer.
|
||||
ColourBuffer sampleBuffer(addr, format, width, height);
|
||||
return colourBufferCache.add(sampleBuffer);
|
||||
}
|
||||
|
||||
void RendererGL::screenshot(const std::string& name) {
|
||||
|
|
|
@ -9,6 +9,11 @@ void Texture::allocate() {
|
|||
texture.create(size.u(), size.v(), GL_RGBA8);
|
||||
texture.bind();
|
||||
|
||||
#ifdef GPU_DEBUG_INFO
|
||||
const auto name = Helpers::format("Surface %dx%d %s from 0x%08X", size.x(), size.y(), PICA::textureFormatToString(format), location);
|
||||
OpenGL::setObjectLabel(GL_TEXTURE, texture.handle(), name.c_str());
|
||||
#endif
|
||||
|
||||
setNewConfig(config);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "renderer_null/renderer_null.hpp"
|
||||
|
||||
RendererNull::RendererNull(GPU& gpu, const std::array<u32, regNum>& internalRegs) : Renderer(gpu, internalRegs) {}
|
||||
RendererNull::RendererNull(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: Renderer(gpu, internalRegs, externalRegs) {}
|
||||
RendererNull::~RendererNull() {}
|
||||
|
||||
void RendererNull::reset() {}
|
||||
|
@ -8,5 +9,6 @@ void RendererNull::display() {}
|
|||
void RendererNull::initGraphicsContext(SDL_Window* window) {}
|
||||
void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {}
|
||||
void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {}
|
||||
void RendererNull::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {}
|
||||
void RendererNull::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {}
|
||||
void RendererNull::screenshot(const std::string& name) {}
|
||||
void RendererNull::screenshot(const std::string& name) {}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "renderer_sw/renderer_sw.hpp"
|
||||
|
||||
RendererSw::RendererSw(GPU& gpu, const std::array<u32, regNum>& internalRegs) : Renderer(gpu, internalRegs) {}
|
||||
RendererSw::RendererSw(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: Renderer(gpu, internalRegs, externalRegs) {}
|
||||
RendererSw::~RendererSw() {}
|
||||
|
||||
void RendererSw::reset() { printf("RendererSW: Unimplemented reset call\n"); }
|
||||
|
@ -13,8 +14,12 @@ void RendererSw::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u
|
|||
printf("RendererSW: Unimplemented displayTransfer call\n");
|
||||
}
|
||||
|
||||
void RendererSw::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
printf("RendererSW: Unimplemented textureCopy call\n");
|
||||
}
|
||||
|
||||
void RendererSw::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {
|
||||
printf("RendererSW: Unimplemented drawVertices call\n");
|
||||
}
|
||||
|
||||
void RendererSw::screenshot(const std::string& name) { printf("RendererSW: Unimplemented screenshot call\n"); }
|
||||
void RendererSw::screenshot(const std::string& name) { printf("RendererSW: Unimplemented screenshot call\n"); }
|
||||
|
|
|
@ -200,7 +200,8 @@ vk::Result RendererVK::recreateSwapchain(vk::SurfaceKHR surface, vk::Extent2D sw
|
|||
return vk::Result::eSuccess;
|
||||
}
|
||||
|
||||
RendererVK::RendererVK(GPU& gpu, const std::array<u32, regNum>& internalRegs) : Renderer(gpu, internalRegs) {}
|
||||
RendererVK::RendererVK(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: Renderer(gpu, internalRegs, externalRegs) {}
|
||||
|
||||
RendererVK::~RendererVK() {}
|
||||
|
||||
|
@ -541,6 +542,8 @@ void RendererVK::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 co
|
|||
|
||||
void RendererVK::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {}
|
||||
|
||||
void RendererVK::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {}
|
||||
|
||||
void RendererVK::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {}
|
||||
|
||||
void RendererVK::screenshot(const std::string& name) {}
|
||||
void RendererVK::screenshot(const std::string& name) {}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "services/gsp_gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "ipc.hpp"
|
||||
#include "kernel.hpp"
|
||||
|
||||
|
@ -10,6 +11,7 @@ namespace ServiceCommands {
|
|||
RegisterInterruptRelayQueue = 0x00130042,
|
||||
WriteHwRegs = 0x00010082,
|
||||
WriteHwRegsWithMask = 0x00020084,
|
||||
SetBufferSwap = 0x00050200,
|
||||
FlushDataCache = 0x00080082,
|
||||
SetLCDForceBlack = 0x000B0040,
|
||||
TriggerCmdReqQueue = 0x000C0000,
|
||||
|
@ -44,13 +46,14 @@ void GPUService::handleSyncRequest(u32 messagePointer) {
|
|||
case ServiceCommands::FlushDataCache: flushDataCache(messagePointer); break;
|
||||
case ServiceCommands::RegisterInterruptRelayQueue: registerInterruptRelayQueue(messagePointer); break;
|
||||
case ServiceCommands::SetAxiConfigQoSMode: setAxiConfigQoSMode(messagePointer); break;
|
||||
case ServiceCommands::SetBufferSwap: setBufferSwap(messagePointer); break;
|
||||
case ServiceCommands::SetInternalPriorities: setInternalPriorities(messagePointer); break;
|
||||
case ServiceCommands::SetLCDForceBlack: setLCDForceBlack(messagePointer); break;
|
||||
case ServiceCommands::StoreDataCache: storeDataCache(messagePointer); break;
|
||||
case ServiceCommands::TriggerCmdReqQueue: [[likely]] triggerCmdReqQueue(messagePointer); break;
|
||||
case ServiceCommands::WriteHwRegs: writeHwRegs(messagePointer); break;
|
||||
case ServiceCommands::WriteHwRegsWithMask: writeHwRegsWithMask(messagePointer); break;
|
||||
; default: Helpers::panic("GPU service requested. Command: %08X\n", command);
|
||||
default: Helpers::panic("GPU service requested. Command: %08X\n", command);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -124,15 +127,12 @@ void GPUService::requestInterrupt(GPUInterrupt type) {
|
|||
// Not emulating this causes Yoshi's Wooly World, Captain Toad, Metroid 2 et al to hang
|
||||
if (type == GPUInterrupt::VBlank0 || type == GPUInterrupt::VBlank1) {
|
||||
int screen = static_cast<u32>(type) - static_cast<u32>(GPUInterrupt::VBlank0); // 0 for top screen, 1 for bottom
|
||||
|
||||
constexpr u32 FBInfoSize = 0x40;
|
||||
// TODO: Offset depends on GSP thread being triggered
|
||||
u8* info = &sharedMem[0x200 + screen * FBInfoSize];
|
||||
u8& dirtyFlag = info[1];
|
||||
FramebufferUpdate* update = reinterpret_cast<FramebufferUpdate*>(&sharedMem[0x200 + screen * sizeof(FramebufferUpdate)]);
|
||||
|
||||
if (dirtyFlag & 1) {
|
||||
// TODO: Submit buffer info here
|
||||
dirtyFlag &= ~1;
|
||||
if (update->dirtyFlag & 1) {
|
||||
setBufferSwapImpl(screen, update->framebufferInfo[update->index]);
|
||||
update->dirtyFlag &= ~1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -220,7 +220,7 @@ void GPUService::flushDataCache(u32 messagePointer) {
|
|||
u32 address = mem.read32(messagePointer + 4);
|
||||
u32 size = mem.read32(messagePointer + 8);
|
||||
u32 processHandle = handle = mem.read32(messagePointer + 16);
|
||||
log("GSP::GPU::FlushDataCache(address = %08X, size = %X, process = %X\n", address, size, processHandle);
|
||||
log("GSP::GPU::FlushDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle);
|
||||
|
||||
mem.write32(messagePointer, IPC::responseHeader(0x8, 1, 0));
|
||||
mem.write32(messagePointer + 4, Result::Success);
|
||||
|
@ -230,7 +230,7 @@ void GPUService::storeDataCache(u32 messagePointer) {
|
|||
u32 address = mem.read32(messagePointer + 4);
|
||||
u32 size = mem.read32(messagePointer + 8);
|
||||
u32 processHandle = handle = mem.read32(messagePointer + 16);
|
||||
log("GSP::GPU::StoreDataCache(address = %08X, size = %X, process = %X\n", address, size, processHandle);
|
||||
log("GSP::GPU::StoreDataCache(address = %08X, size = %X, process = %X)\n", address, size, processHandle);
|
||||
|
||||
mem.write32(messagePointer, IPC::responseHeader(0x1F, 1, 0));
|
||||
mem.write32(messagePointer + 4, Result::Success);
|
||||
|
@ -261,6 +261,24 @@ void GPUService::setAxiConfigQoSMode(u32 messagePointer) {
|
|||
mem.write32(messagePointer + 4, Result::Success);
|
||||
}
|
||||
|
||||
void GPUService::setBufferSwap(u32 messagePointer) {
|
||||
FramebufferInfo info{};
|
||||
const u32 screenId = mem.read32(messagePointer + 4); // Selects either PDC0 or PDC1
|
||||
info.activeFb = mem.read32(messagePointer + 8);
|
||||
info.leftFramebufferVaddr = mem.read32(messagePointer + 12);
|
||||
info.rightFramebufferVaddr = mem.read32(messagePointer + 16);
|
||||
info.stride = mem.read32(messagePointer + 20);
|
||||
info.format = mem.read32(messagePointer + 24);
|
||||
info.displayFb = mem.read32(messagePointer + 28); // Selects either framebuffer A or B
|
||||
|
||||
log("GSP::GPU::SetBufferSwap\n");
|
||||
Helpers::panic("Untested GSP::GPU::SetBufferSwap call");
|
||||
|
||||
setBufferSwapImpl(screenId, info);
|
||||
mem.write32(messagePointer, IPC::responseHeader(0x05, 1, 0));
|
||||
mem.write32(messagePointer + 4, Result::Success);
|
||||
}
|
||||
|
||||
// Seems to also be completely undocumented
|
||||
void GPUService::setInternalPriorities(u32 messagePointer) {
|
||||
log("GSP::GPU::SetInternalPriorities\n");
|
||||
|
@ -283,7 +301,7 @@ void GPUService::processCommandBuffer() {
|
|||
log("Processing %d GPU commands\n", commandsLeft);
|
||||
|
||||
while (commandsLeft != 0) {
|
||||
u32 cmdID = cmd[0] & 0xff;
|
||||
const u32 cmdID = cmd[0] & 0xff;
|
||||
switch (cmdID) {
|
||||
case GXCommands::ProcessCommandList: processCommandList(cmd); break;
|
||||
case GXCommands::MemoryFill: memoryFill(cmd); break;
|
||||
|
@ -375,12 +393,47 @@ void GPUService::flushCacheRegions(u32* cmd) {
|
|||
log("GSP::GPU::FlushCacheRegions (Stubbed)\n");
|
||||
}
|
||||
|
||||
void GPUService::setBufferSwapImpl(u32 screenId, const FramebufferInfo& info) {
|
||||
using namespace PICA::ExternalRegs;
|
||||
|
||||
static constexpr std::array<u32, 8> fbAddresses = {
|
||||
Framebuffer0AFirstAddr,
|
||||
Framebuffer0BFirstAddr,
|
||||
Framebuffer1AFirstAddr,
|
||||
Framebuffer1BFirstAddr,
|
||||
Framebuffer0ASecondAddr,
|
||||
Framebuffer0BSecondAddr,
|
||||
Framebuffer1ASecondAddr,
|
||||
Framebuffer1BSecondAddr,
|
||||
};
|
||||
|
||||
auto& regs = gpu.getExtRegisters();
|
||||
|
||||
const u32 fbIndex = info.activeFb * 4 + screenId * 2;
|
||||
regs[fbAddresses[fbIndex]] = VaddrToPaddr(info.leftFramebufferVaddr);
|
||||
regs[fbAddresses[fbIndex + 1]] = VaddrToPaddr(info.rightFramebufferVaddr);
|
||||
|
||||
static constexpr std::array<u32, 6> configAddresses = {
|
||||
Framebuffer0Config,
|
||||
Framebuffer0Select,
|
||||
Framebuffer0Stride,
|
||||
Framebuffer1Config,
|
||||
Framebuffer1Select,
|
||||
Framebuffer1Stride,
|
||||
};
|
||||
|
||||
const u32 configIndex = screenId * 3;
|
||||
regs[configAddresses[configIndex]] = info.format;
|
||||
regs[configAddresses[configIndex + 1]] = info.displayFb;
|
||||
regs[configAddresses[configIndex + 2]] = info.stride;
|
||||
}
|
||||
|
||||
// Actually send command list (aka display list) to GPU
|
||||
void GPUService::processCommandList(u32* cmd) {
|
||||
const u32 address = cmd[1] & ~7; // Buffer address
|
||||
const u32 size = cmd[2] & ~3; // Buffer size in bytes
|
||||
const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
|
||||
const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
|
||||
[[maybe_unused]] const bool updateGas = cmd[3] == 1; // Update gas additive blend results (0 = don't update, 1 = update)
|
||||
[[maybe_unused]] const bool flushBuffer = cmd[7] == 1; // Flush buffer (0 = don't flush, 1 = flush)
|
||||
|
||||
log("GPU::GSP::processCommandList. Address: %08X, size in bytes: %08X\n", address, size);
|
||||
gpu.startCommandList(address, size);
|
||||
|
@ -390,7 +443,15 @@ void GPUService::processCommandList(u32* cmd) {
|
|||
// TODO: Emulate the transfer engine & its registers
|
||||
// Then this can be emulated by just writing the appropriate values there
|
||||
void GPUService::triggerTextureCopy(u32* cmd) {
|
||||
Helpers::warn("GSP::GPU::TriggerTextureCopy (unimplemented)\n");
|
||||
const u32 inputAddr = VaddrToPaddr(cmd[1]);
|
||||
const u32 outputAddr = VaddrToPaddr(cmd[2]);
|
||||
const u32 totalBytes = cmd[3];
|
||||
const u32 inputSize = cmd[4];
|
||||
const u32 outputSize = cmd[5];
|
||||
const u32 flags = cmd[6];
|
||||
|
||||
log("GSP::GPU::TriggerTextureCopy (Stubbed)\n");
|
||||
gpu.textureCopy(inputAddr, outputAddr, totalBytes, inputSize, outputSize, flags);
|
||||
// This uses the transfer engine and thus needs to fire a PPF interrupt.
|
||||
// NSMB2 relies on this
|
||||
requestInterrupt(GPUInterrupt::PPF);
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
|
||||
Renderer::Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
|
||||
Renderer::Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs, const std::array<u32, extRegNum>& externalRegs)
|
||||
: gpu(gpu), regs(internalRegs), externalRegs(externalRegs) {}
|
||||
Renderer::~Renderer() {}
|
||||
|
||||
std::optional<RendererType> Renderer::typeFromString(std::string inString) {
|
||||
|
|
|
@ -687,11 +687,10 @@ namespace OpenGL {
|
|||
Rectangle(T x, T y, T width, T height) : x(x), y(y), width(width), height(height) {}
|
||||
|
||||
bool isEmpty() const { return width == 0 && height == 0; }
|
||||
bool isLine() const { return (width == 0 && height != 0) || (width != 0 && height == 0); }
|
||||
|
||||
void setEmpty() { x = y = width = height = 0; }
|
||||
};
|
||||
|
||||
using Rect = Rectangle<GLuint>;
|
||||
|
||||
} // end namespace OpenGL
|
||||
} // end namespace OpenGL
|
Loading…
Add table
Reference in a new issue