mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-16 18:59:48 +12:00
add: texture cache
This commit is contained in:
parent
8d94cce537
commit
bac3a8e040
6 changed files with 338 additions and 2 deletions
|
@ -407,6 +407,7 @@ if(ENABLE_METAL AND APPLE)
|
||||||
|
|
||||||
set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
|
set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp
|
||||||
src/core/renderer_mtl/renderer_mtl.cpp
|
src/core/renderer_mtl/renderer_mtl.cpp
|
||||||
|
src/core/renderer_mtl/texture.cpp
|
||||||
src/host_shaders/metal_shaders.metal
|
src/host_shaders/metal_shaders.metal
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,6 @@ template <typename SurfaceType, size_t capacity, bool evictOnOverflow = false>
|
||||||
class SurfaceCache {
|
class SurfaceCache {
|
||||||
// Vanilla std::optional can't hold actual references
|
// Vanilla std::optional can't hold actual references
|
||||||
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
|
using OptionalRef = std::optional<std::reference_wrapper<SurfaceType>>;
|
||||||
static_assert(std::is_same<SurfaceType, ColourBuffer>() || std::is_same<SurfaceType, DepthBuffer>() ||
|
|
||||||
std::is_same<SurfaceType, Texture>(), "Invalid surface type");
|
|
||||||
|
|
||||||
size_t size;
|
size_t size;
|
||||||
size_t evictionIndex;
|
size_t evictionIndex;
|
||||||
|
|
|
@ -2,6 +2,9 @@
|
||||||
#include <QuartzCore/QuartzCore.hpp>
|
#include <QuartzCore/QuartzCore.hpp>
|
||||||
|
|
||||||
#include "renderer.hpp"
|
#include "renderer.hpp"
|
||||||
|
#include "texture.hpp"
|
||||||
|
// HACK: use the OpenGL cache
|
||||||
|
#include "../renderer_gl/surface_cache.hpp"
|
||||||
|
|
||||||
class GPU;
|
class GPU;
|
||||||
|
|
||||||
|
@ -30,6 +33,9 @@ class RendererMTL final : public Renderer {
|
||||||
MTL::Device* device;
|
MTL::Device* device;
|
||||||
MTL::CommandQueue* commandQueue;
|
MTL::CommandQueue* commandQueue;
|
||||||
|
|
||||||
|
// Caches
|
||||||
|
SurfaceCache<Metal::Texture, 256, true> textureCache;
|
||||||
|
|
||||||
// HACK
|
// HACK
|
||||||
MTL::Texture* topScreenTexture;
|
MTL::Texture* topScreenTexture;
|
||||||
|
|
||||||
|
|
72
include/renderer_mtl/texture.hpp
Normal file
72
include/renderer_mtl/texture.hpp
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
#pragma once
|
||||||
|
#include <array>
|
||||||
|
#include <string>
|
||||||
|
#include <Metal/Metal.hpp>
|
||||||
|
#include "PICA/regs.hpp"
|
||||||
|
#include "boost/icl/interval.hpp"
|
||||||
|
#include "helpers.hpp"
|
||||||
|
#include "math_util.hpp"
|
||||||
|
#include "opengl.hpp"
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
using Interval = boost::icl::right_open_interval<T>;
|
||||||
|
|
||||||
|
namespace Metal {
|
||||||
|
|
||||||
|
struct Texture {
|
||||||
|
MTL::Device* device;
|
||||||
|
|
||||||
|
u32 location;
|
||||||
|
u32 config; // Magnification/minification filter, wrapping configs, etc
|
||||||
|
PICA::TextureFmt format;
|
||||||
|
OpenGL::uvec2 size;
|
||||||
|
bool valid;
|
||||||
|
|
||||||
|
// Range of VRAM taken up by buffer
|
||||||
|
Interval<u32> range;
|
||||||
|
|
||||||
|
MTL::Texture* texture = nullptr;
|
||||||
|
|
||||||
|
Texture() : valid(false) {}
|
||||||
|
|
||||||
|
Texture(MTL::Device* dev, u32 loc, PICA::TextureFmt format, u32 x, u32 y, u32 config, bool valid = true)
|
||||||
|
: device(dev), location(loc), format(format), size({x, y}), config(config), valid(valid) {
|
||||||
|
|
||||||
|
u64 endLoc = (u64)loc + sizeInBytes();
|
||||||
|
// Check if start and end are valid here
|
||||||
|
range = Interval<u32>(loc, (u32)endLoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For 2 textures to "match" we only care about their locations, formats, and dimensions to match
|
||||||
|
// For other things, such as filtering mode, etc, we can just switch the attributes of the cached texture
|
||||||
|
bool matches(Texture& other) {
|
||||||
|
return location == other.location && format == other.format &&
|
||||||
|
size.x() == other.size.x() && size.y() == other.size.y();
|
||||||
|
}
|
||||||
|
|
||||||
|
void allocate();
|
||||||
|
void setNewConfig(u32 newConfig);
|
||||||
|
void decodeTexture(std::span<const u8> data);
|
||||||
|
void free();
|
||||||
|
u64 sizeInBytes();
|
||||||
|
|
||||||
|
u32 decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data);
|
||||||
|
|
||||||
|
// Get the morton interleave offset of a texel based on its U and V values
|
||||||
|
static u32 mortonInterleave(u32 u, u32 v);
|
||||||
|
// Get the byte offset of texel (u, v) in the texture
|
||||||
|
static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel);
|
||||||
|
static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width);
|
||||||
|
|
||||||
|
// Returns the format of this texture as a string
|
||||||
|
std::string_view formatToString() {
|
||||||
|
return PICA::textureFormatToString(format);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the texel at coordinates (u, v) of an ETC1(A4) texture
|
||||||
|
// TODO: Make hasAlpha a template parameter
|
||||||
|
u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data);
|
||||||
|
u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Metal
|
|
@ -17,6 +17,8 @@ RendererMTL::RendererMTL(GPU& gpu, const std::array<u32, regNum>& internalRegs,
|
||||||
RendererMTL::~RendererMTL() {}
|
RendererMTL::~RendererMTL() {}
|
||||||
|
|
||||||
void RendererMTL::reset() {
|
void RendererMTL::reset() {
|
||||||
|
textureCache.reset();
|
||||||
|
|
||||||
// TODO: implement
|
// TODO: implement
|
||||||
Helpers::warn("RendererMTL::reset not implemented");
|
Helpers::warn("RendererMTL::reset not implemented");
|
||||||
}
|
}
|
||||||
|
@ -219,6 +221,8 @@ void RendererMTL::screenshot(const std::string& name) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererMTL::deinitGraphicsContext() {
|
void RendererMTL::deinitGraphicsContext() {
|
||||||
|
textureCache.reset();
|
||||||
|
|
||||||
// TODO: implement
|
// TODO: implement
|
||||||
Helpers::warn("RendererMTL::deinitGraphicsContext not implemented");
|
Helpers::warn("RendererMTL::deinitGraphicsContext not implemented");
|
||||||
}
|
}
|
||||||
|
|
255
src/core/renderer_mtl/texture.cpp
Normal file
255
src/core/renderer_mtl/texture.cpp
Normal file
|
@ -0,0 +1,255 @@
|
||||||
|
#include "renderer_mtl/texture.hpp"
|
||||||
|
#include "colour.hpp"
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
using namespace Helpers;
|
||||||
|
|
||||||
|
namespace Metal {
|
||||||
|
|
||||||
|
void Texture::allocate() {
|
||||||
|
MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::alloc()->init();
|
||||||
|
descriptor->setTextureType(MTL::TextureType2D);
|
||||||
|
descriptor->setPixelFormat(MTL::PixelFormatRGBA8Unorm);
|
||||||
|
descriptor->setWidth(size.u());
|
||||||
|
descriptor->setHeight(size.v());
|
||||||
|
descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite);
|
||||||
|
descriptor->setStorageMode(MTL::StorageModeShared); // TODO: use private + staging buffers?
|
||||||
|
texture = device->newTexture(descriptor);
|
||||||
|
|
||||||
|
setNewConfig(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the texture's configuration, which includes min/mag filters, wrapping S/T modes, and so on
|
||||||
|
void Texture::setNewConfig(u32 cfg) {
|
||||||
|
config = cfg;
|
||||||
|
|
||||||
|
// TODO: implement this
|
||||||
|
}
|
||||||
|
|
||||||
|
void Texture::free() {
|
||||||
|
valid = false;
|
||||||
|
|
||||||
|
if (texture) {
|
||||||
|
texture->release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 Texture::sizeInBytes() {
|
||||||
|
u64 pixelCount = u64(size.x()) * u64(size.y());
|
||||||
|
|
||||||
|
switch (format) {
|
||||||
|
case PICA::TextureFmt::RGBA8: // 4 bytes per pixel
|
||||||
|
return pixelCount * 4;
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RGB8: // 3 bytes per pixel
|
||||||
|
return pixelCount * 3;
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RGBA5551: // 2 bytes per pixel
|
||||||
|
case PICA::TextureFmt::RGB565:
|
||||||
|
case PICA::TextureFmt::RGBA4:
|
||||||
|
case PICA::TextureFmt::RG8:
|
||||||
|
case PICA::TextureFmt::IA8:
|
||||||
|
return pixelCount * 2;
|
||||||
|
|
||||||
|
case PICA::TextureFmt::A8: // 1 byte per pixel
|
||||||
|
case PICA::TextureFmt::I8:
|
||||||
|
case PICA::TextureFmt::IA4:
|
||||||
|
return pixelCount;
|
||||||
|
|
||||||
|
case PICA::TextureFmt::I4: // 4 bits per pixel
|
||||||
|
case PICA::TextureFmt::A4:
|
||||||
|
return pixelCount / 2;
|
||||||
|
|
||||||
|
case PICA::TextureFmt::ETC1: // Compressed formats
|
||||||
|
case PICA::TextureFmt::ETC1A4: {
|
||||||
|
// Number of 4x4 tiles
|
||||||
|
const u64 tileCount = pixelCount / 16;
|
||||||
|
// Tiles are 8 bytes each on ETC1 and 16 bytes each on ETC1A4
|
||||||
|
const u64 tileSize = format == PICA::TextureFmt::ETC1 ? 8 : 16;
|
||||||
|
return tileCount * tileSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
Helpers::panic("[PICA] Attempted to get size of invalid texture type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// u and v are the UVs of the relevant texel
|
||||||
|
// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here
|
||||||
|
// https://en.wikipedia.org/wiki/Z-order_curve
|
||||||
|
// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel
|
||||||
|
// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8
|
||||||
|
// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg
|
||||||
|
u32 Texture::mortonInterleave(u32 u, u32 v) {
|
||||||
|
static constexpr u32 xOffsets[] = { 0, 1, 4, 5, 16, 17, 20, 21 };
|
||||||
|
static constexpr u32 yOffsets[] = { 0, 2, 8, 10, 32, 34, 40, 42 };
|
||||||
|
|
||||||
|
return xOffsets[u & 7] + yOffsets[v & 7];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the byte offset of texel (u, v) in the texture
|
||||||
|
u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) {
|
||||||
|
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
|
||||||
|
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
|
||||||
|
|
||||||
|
return offset * bytesPerPixel;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte
|
||||||
|
u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
|
||||||
|
u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to
|
||||||
|
offset += mortonInterleave(u, v); // Add the in-tile offset of the texel
|
||||||
|
|
||||||
|
return offset / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the texel at position (u, v)
|
||||||
|
// fmt: format of the texture
|
||||||
|
// data: texture data of the texture
|
||||||
|
u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
|
||||||
|
switch (fmt) {
|
||||||
|
case PICA::TextureFmt::RGBA4: {
|
||||||
|
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||||
|
u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||||
|
|
||||||
|
u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
|
||||||
|
u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
|
||||||
|
u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel));
|
||||||
|
u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel));
|
||||||
|
|
||||||
|
return (alpha << 24) | (b << 16) | (g << 8) | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RGBA5551: {
|
||||||
|
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||||
|
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||||
|
|
||||||
|
u8 alpha = getBit<0>(texel) ? 0xff : 0;
|
||||||
|
u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel));
|
||||||
|
u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel));
|
||||||
|
u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
|
||||||
|
|
||||||
|
return (alpha << 24) | (b << 16) | (g << 8) | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RGB565: {
|
||||||
|
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||||
|
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||||
|
|
||||||
|
const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
|
||||||
|
const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
|
||||||
|
const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
|
||||||
|
|
||||||
|
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RG8: {
|
||||||
|
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||||
|
constexpr u8 b = 0;
|
||||||
|
const u8 g = data[offset];
|
||||||
|
const u8 r = data[offset + 1];
|
||||||
|
|
||||||
|
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RGB8: {
|
||||||
|
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
|
||||||
|
const u8 b = data[offset];
|
||||||
|
const u8 g = data[offset + 1];
|
||||||
|
const u8 r = data[offset + 2];
|
||||||
|
|
||||||
|
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::RGBA8: {
|
||||||
|
const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
|
||||||
|
const u8 alpha = data[offset];
|
||||||
|
const u8 b = data[offset + 1];
|
||||||
|
const u8 g = data[offset + 2];
|
||||||
|
const u8 r = data[offset + 3];
|
||||||
|
|
||||||
|
return (alpha << 24) | (b << 16) | (g << 8) | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::IA4: {
|
||||||
|
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||||
|
const u8 texel = data[offset];
|
||||||
|
const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
|
||||||
|
const u8 intensity = Colour::convert4To8Bit(texel >> 4);
|
||||||
|
|
||||||
|
// Intensity formats just copy the intensity value to every colour channel
|
||||||
|
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::A4: {
|
||||||
|
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||||
|
|
||||||
|
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||||
|
u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
|
||||||
|
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
|
||||||
|
|
||||||
|
// A8 sets RGB to 0
|
||||||
|
return (alpha << 24) | (0 << 16) | (0 << 8) | 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::A8: {
|
||||||
|
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||||
|
const u8 alpha = data[offset];
|
||||||
|
|
||||||
|
// A8 sets RGB to 0
|
||||||
|
return (alpha << 24) | (0 << 16) | (0 << 8) | 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::I4: {
|
||||||
|
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||||
|
|
||||||
|
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||||
|
u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
|
||||||
|
intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
|
||||||
|
|
||||||
|
// Intensity formats just copy the intensity value to every colour channel
|
||||||
|
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::I8: {
|
||||||
|
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||||
|
const u8 intensity = data[offset];
|
||||||
|
|
||||||
|
// Intensity formats just copy the intensity value to every colour channel
|
||||||
|
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::IA8: {
|
||||||
|
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||||
|
|
||||||
|
// Same as I8 except each pixel gets its own alpha value too
|
||||||
|
const u8 alpha = data[offset];
|
||||||
|
const u8 intensity = data[offset + 1];
|
||||||
|
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PICA::TextureFmt::ETC1: return getTexelETC(false, u, v, size.u(), data);
|
||||||
|
case PICA::TextureFmt::ETC1A4: return getTexelETC(true, u, v, size.u(), data);
|
||||||
|
|
||||||
|
default:
|
||||||
|
Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast<int>(fmt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Texture::decodeTexture(std::span<const u8> data) {
|
||||||
|
std::vector<u32> decoded;
|
||||||
|
decoded.reserve(u64(size.u()) * u64(size.v()));
|
||||||
|
|
||||||
|
// Decode texels line by line
|
||||||
|
for (u32 v = 0; v < size.v(); v++) {
|
||||||
|
for (u32 u = 0; u < size.u(); u++) {
|
||||||
|
u32 colour = decodeTexel(u, v, format, data);
|
||||||
|
decoded.push_back(colour);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 bytesPerRow = sizeInBytes() / size.v();
|
||||||
|
texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), bytesPerRow, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Metal
|
Loading…
Add table
Reference in a new issue