mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-22 05:15:51 +12:00
gpu: Implement basic texcopy
* Improves rendering in FE:A but the screens will still not show because it requires surface validation
This commit is contained in:
parent
297afd20d7
commit
82d9511993
15 changed files with 139 additions and 16 deletions
|
@ -599,12 +599,11 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u
|
|||
u32 outputWidth = outputSize & 0xffff;
|
||||
u32 outputHeight = outputSize >> 16;
|
||||
|
||||
if (inputWidth != outputWidth) {
|
||||
// Helpers::warn("Strided display transfer is not handled correctly!\n");
|
||||
}
|
||||
OpenGL::DebugScope scope("DisplayTransfer inputAddr 0x%08X outputAddr 0x%08X inputWidth %d outputWidth %d inputWidth %d outputHeight %d",
|
||||
inputAddr, outputAddr, inputWidth, outputWidth, inputHeight, outputHeight);
|
||||
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, inputHeight);
|
||||
Math::Rect<u32> srcRect = srcFramebuffer.getSubRect(inputAddr, outputWidth, outputHeight);
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, inputFormat, inputWidth, outputHeight);
|
||||
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, outputWidth, outputHeight);
|
||||
|
||||
// Apply scaling for the destination rectangle.
|
||||
if (scaling == PICA::Scaling::X || scaling == PICA::Scaling::XY) {
|
||||
|
@ -616,18 +615,88 @@ void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u
|
|||
}
|
||||
|
||||
auto destFramebuffer = getColourBuffer(outputAddr, outputFormat, outputWidth, outputHeight);
|
||||
Math::Rect<u32> destRect = destFramebuffer.getSubRect(outputAddr, outputWidth, outputHeight);
|
||||
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, outputWidth, outputHeight);
|
||||
|
||||
if (inputWidth != outputWidth) {
|
||||
// Helpers::warn("Strided display transfer is not handled correctly!\n");
|
||||
}
|
||||
|
||||
// Blit the framebuffers
|
||||
srcFramebuffer.fbo.bind(OpenGL::ReadFramebuffer);
|
||||
destFramebuffer.fbo.bind(OpenGL::DrawFramebuffer);
|
||||
srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer);
|
||||
destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer);
|
||||
glBlitFramebuffer(
|
||||
srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT,
|
||||
GL_LINEAR
|
||||
);
|
||||
}
|
||||
|
||||
ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height) {
|
||||
void RendererGL::textureCopy(u32 inputAddr, u32 outputAddr, u32 totalBytes, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
// Texture copy size is aligned to 16 byte units
|
||||
const u32 copySize = totalBytes & ~0xf;
|
||||
if (copySize == 0) {
|
||||
printf("TextureCopy total bytes less than 16!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// The width and gap are provided in 16-byte units.
|
||||
const u32 inputWidth = (inputSize & 0xffff) << 4;
|
||||
const u32 inputGap = (inputSize >> 16) << 4;
|
||||
const u32 outputWidth = (outputSize & 0xffff) << 4;
|
||||
const u32 outputGap = (outputSize >> 16) << 4;
|
||||
|
||||
OpenGL::DebugScope scope("TextureCopy inputAddr 0x%08X outputAddr 0x%08X totalBytes %d inputWidth %d inputGap %d outputWidth %d outputGap %d",
|
||||
inputAddr, outputAddr, totalBytes, inputWidth, inputGap, outputWidth, outputGap);
|
||||
|
||||
if (inputGap != 0 || outputGap != 0) {
|
||||
Helpers::warn("Strided texture copy\n");
|
||||
}
|
||||
if (inputWidth != outputWidth) {
|
||||
Helpers::warn("Input width does not match output width, cannot accelerate texture copy!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Texture copy is a raw data copy in PICA, which means no format or tiling information is provided to the engine.
|
||||
// Depending if the target surface is linear or tiled, games set inputWidth to either the width of the texture or
|
||||
// the width multiplied by eight (because tiles are stored linearly in memory).
|
||||
// To properly accelerate this we must examine each surface individually. For now we assume the most common case
|
||||
// of tiled surface with RGBA8 format. If our assumption does not hold true, we abort the texture copy as inserting
|
||||
// that surface is not correct.
|
||||
|
||||
// We assume the source surface is tiled and RGBA8. inputWidth is in bytes so divide it
|
||||
// by eight * sizePerPixel(RGBA8) to convert it to a useable width.
|
||||
const u32 bpp = sizePerPixel(PICA::ColorFmt::RGBA8);
|
||||
const u32 copyStride = (inputWidth + inputGap) / (8 * bpp);
|
||||
const u32 copyWidth = inputWidth / (8 * bpp);
|
||||
|
||||
// inputHeight/outputHeight are typically set to zero so they cannot be used to get the height of the copy region
|
||||
// in contrast to display transfer. Compute height manually by dividing the copy size with the copy width. The result
|
||||
// is the number of vertical tiles so multiply that by eight to get the actual copy height.
|
||||
const u32 copyHeight = (copySize / inputWidth) * 8;
|
||||
|
||||
// Find the source surface.
|
||||
auto srcFramebuffer = getColourBuffer(inputAddr, PICA::ColorFmt::RGBA8, copyStride, copyHeight, false);
|
||||
if (!srcFramebuffer) {
|
||||
printf("TextureCopy failed to locate src framebuffer!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
Math::Rect<u32> srcRect = srcFramebuffer->getSubRect(inputAddr, copyWidth, copyHeight);
|
||||
|
||||
// Assume the destination surface has the same format. Unless the surfaces have the same block width,
|
||||
// texture copy does not make sense.
|
||||
auto destFramebuffer = getColourBuffer(outputAddr, srcFramebuffer->format, copyWidth, copyHeight);
|
||||
Math::Rect<u32> destRect = destFramebuffer->getSubRect(outputAddr, copyWidth, copyHeight);
|
||||
|
||||
// Blit the framebuffers
|
||||
srcFramebuffer->fbo.bind(OpenGL::ReadFramebuffer);
|
||||
destFramebuffer->fbo.bind(OpenGL::DrawFramebuffer);
|
||||
glBlitFramebuffer(
|
||||
srcRect.left, srcRect.top, srcRect.right, srcRect.bottom, destRect.left, destRect.top, destRect.right, destRect.bottom, GL_COLOR_BUFFER_BIT,
|
||||
GL_LINEAR
|
||||
);
|
||||
}
|
||||
|
||||
std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 width, u32 height, bool createIfnotFound) {
|
||||
// Try to find an already existing buffer that contains the provided address
|
||||
// This is a more relaxed check compared to getColourFBO as display transfer/texcopy may refer to
|
||||
// subrect of a surface and in case of texcopy we don't know the format of the surface.
|
||||
|
@ -636,6 +705,10 @@ ColourBuffer RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt format, u32 wi
|
|||
return buffer.value().get();
|
||||
}
|
||||
|
||||
if (!createIfnotFound) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Otherwise create and cache a new buffer.
|
||||
ColourBuffer sampleBuffer(addr, format, width, height);
|
||||
return colourBufferCache.add(sampleBuffer);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue