diff --git a/CMakeLists.txt b/CMakeLists.txt index 2362ffb2..fa33dede 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -420,7 +420,7 @@ set(HEADER_FILES include/emulator.hpp include/helpers.hpp include/termcolor.hpp if(IOS) set(SOURCE_FILES ${SOURCE_FILES} src/miniaudio/miniaudio.m) target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS=1") - + if (IOS_SIMULATOR_BUILD) target_compile_definitions(AlberCore PUBLIC "PANDA3DS_IOS_SIMULATOR=1") endif() @@ -604,14 +604,16 @@ if(ENABLE_METAL AND APPLE) include/renderer_mtl/mtl_common.hpp include/renderer_mtl/pica_to_mtl.hpp include/renderer_mtl/objc_helper.hpp + include/renderer_mtl/texture_decoder.hpp ) set(RENDERER_MTL_SOURCE_FILES src/core/renderer_mtl/metal_cpp_impl.cpp src/core/renderer_mtl/renderer_mtl.cpp src/core/renderer_mtl/mtl_texture.cpp - src/core/renderer_mtl/mtl_etc1.cpp src/core/renderer_mtl/mtl_lut_texture.cpp + src/core/renderer_mtl/pica_to_mtl.cpp src/core/renderer_mtl/objc_helper.mm + src/core/renderer_mtl/texture_decoder.cpp src/host_shaders/metal_shaders.metal src/host_shaders/metal_blit.metal #src/host_shaders/metal_copy_to_lut_texture.metal @@ -808,7 +810,7 @@ if(NOT BUILD_HYDRA_CORE AND NOT BUILD_LIBRETRO_CORE) elseif(BUILD_HYDRA_CORE) target_compile_definitions(AlberCore PRIVATE PANDA3DS_HYDRA_CORE=1) include_directories(third_party/hydra_core/include) - + set(SHARED_SOURCE_FILES src/hydra_core.cpp) if(IOS) set(SHARED_SOURCE_FILES ${SHARED_SOURCE_FILES} src/ios_driver.mm) diff --git a/include/renderer_mtl/mtl_texture.hpp b/include/renderer_mtl/mtl_texture.hpp index 93103091..0e78b13c 100644 --- a/include/renderer_mtl/mtl_texture.hpp +++ b/include/renderer_mtl/mtl_texture.hpp @@ -8,8 +8,9 @@ #include "boost/icl/interval.hpp" #include "helpers.hpp" #include "math_util.hpp" -#include "opengl.hpp" #include "renderer_mtl/pica_to_mtl.hpp" +// TODO: remove dependency on OpenGL +#include "opengl.hpp" template using Interval = boost::icl::right_open_interval; @@ -52,22 +53,19 @@ namespace Metal { void free(); u64 sizeInBytes(); - u8 decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u16 decodeTexelU16(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - u32 decodeTexelU32(u32 u, u32 v, PICA::TextureFmt fmt, std::span data); - - // Get the morton interleave offset of a texel based on its U and V values - static u32 mortonInterleave(u32 u, u32 v); - // Get the byte offset of texel (u, v) in the texture - static u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel); - static u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width); + u8 decodeTexelBGR8ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelA1BGR5ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelB5G6R5ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelABGR4ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelAI8ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelI8ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelAI4ToRGBA4(u32 u, u32 v, std::span data); + u8 decodeTexelAI4ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelI4ToRGBA4(u32 u, u32 v, std::span data); + u8 decodeTexelI4ToRGBA8(u32 u, u32 v, std::span data); + u8 decodeTexelA4ToA8(u32 u, u32 v, std::span data); // Returns the format of this texture as a string std::string_view formatToString() { return PICA::textureFormatToString(format); } - - // Returns the texel at coordinates (u, v) of an ETC1(A4) texture - // TODO: Make hasAlpha a template parameter - u32 getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data); - u32 decodeETC(u32 alpha, u32 u, u32 v, u64 colourData); }; } // namespace Metal diff --git a/include/renderer_mtl/pica_to_mtl.hpp b/include/renderer_mtl/pica_to_mtl.hpp index beb63b17..a0874a65 100644 --- a/include/renderer_mtl/pica_to_mtl.hpp +++ b/include/renderer_mtl/pica_to_mtl.hpp @@ -3,49 +3,21 @@ #include #include "PICA/regs.hpp" +// TODO: remove dependency on OpenGL +#include "opengl.hpp" namespace PICA { struct PixelFormatInfo { MTL::PixelFormat pixelFormat; size_t bytesPerTexel; + void (*decoder)(OpenGL::uvec2, u32, u32, std::span, std::vector&); + bool needsSwizzle{false}; + // TODO: swizzle }; -// iOS, at least on simulator, doesn't support a lot of more "exotic" texture formats, so we avoid them tehre -#ifndef PANDA3DS_IOS - constexpr PixelFormatInfo pixelFormatInfos[14] = { - {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 - {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 - {MTL::PixelFormatB5G6R5Unorm, 2}, // RGB565 - {MTL::PixelFormatABGR4Unorm, 2}, // RGBA4 - {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 - {MTL::PixelFormatRG8Unorm, 2}, // RG8 - {MTL::PixelFormatRGBA8Unorm, 4}, // I8 - {MTL::PixelFormatA8Unorm, 1}, // A8 - {MTL::PixelFormatABGR4Unorm, 2}, // IA4 - {MTL::PixelFormatABGR4Unorm, 2}, // I4 - {MTL::PixelFormatA8Unorm, 1}, // A4 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 - }; -#else - constexpr PixelFormatInfo pixelFormatInfos[14] = { - {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA8 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGB8 - {MTL::PixelFormatBGR5A1Unorm, 2}, // RGBA5551 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGB565 - {MTL::PixelFormatRGBA8Unorm, 4}, // RGBA4 - {MTL::PixelFormatRGBA8Unorm, 4}, // IA8 - {MTL::PixelFormatRG8Unorm, 2}, // RG8 - {MTL::PixelFormatRGBA8Unorm, 4}, // I8 - {MTL::PixelFormatA8Unorm, 1}, // A8 - {MTL::PixelFormatRGBA8Unorm, 4}, // IA4 - {MTL::PixelFormatRGBA8Unorm, 4}, // I4 - {MTL::PixelFormatA8Unorm, 1}, // A4 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1 - {MTL::PixelFormatRGBA8Unorm, 4}, // ETC1A4 - }; -#endif + extern PixelFormatInfo pixelFormatInfos[14]; + + void checkForPixelFormatSupport(MTL::Device* device); inline PixelFormatInfo getPixelFormatInfo(TextureFmt format) { return pixelFormatInfos[static_cast(format)]; } diff --git a/include/renderer_mtl/texture_decoder.hpp b/include/renderer_mtl/texture_decoder.hpp new file mode 100644 index 00000000..29f88695 --- /dev/null +++ b/include/renderer_mtl/texture_decoder.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include "helpers.hpp" +// TODO: remove dependency on OpenGL +#include "opengl.hpp" + +void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); +void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData); diff --git a/src/core/renderer_mtl/mtl_etc1.cpp b/src/core/renderer_mtl/mtl_etc1.cpp deleted file mode 100644 index 420a60ca..00000000 --- a/src/core/renderer_mtl/mtl_etc1.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include - -#include "colour.hpp" -#include "renderer_mtl/mtl_texture.hpp" -#include "renderer_mtl/renderer_mtl.hpp" - - -using namespace Helpers; - -namespace Metal { - static constexpr u32 signExtend3To32(u32 val) { - return (u32)(s32(val) << 29 >> 29); - } - - u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span data) { - // Pixel offset of the 8x8 tile based on u, v and the width of the texture - u32 offs = ((u & ~7) * 8) + ((v & ~7) * width); - if (!hasAlpha) { - offs >>= 1; - } - - // In-tile offsets for u/v - u &= 7; - v &= 7; - - // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles - // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes - const u32 subTileSize = hasAlpha ? 16 : 8; - const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? - - // In-subtile offsets for u/v - u &= 3; - v &= 3; - offs += subTileSize * subTileIndex; - - u32 alpha; - const u64* ptr = reinterpret_cast(data.data() + offs); // Cast to u64* - - if (hasAlpha) { - // First 64 bits of the 4x4 subtile are alpha data - const u64 alphaData = *ptr++; - alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); - } else { - alpha = 0xff; // ETC1 without alpha uses ff for every pixel - } - - // Next 64 bits of the subtile are colour data - u64 colourData = *ptr; - return decodeETC(alpha, u, v, colourData); - } - - u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) { - static constexpr u32 modifiers[8][2] = { - {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, - }; - - // Parse colour data for 4x4 block - const u32 subindices = getBits<0, 16, u32>(colourData); - const u32 negationFlags = getBits<16, 16, u32>(colourData); - const bool flip = getBit<32>(colourData); - const bool diffMode = getBit<33>(colourData); - - // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits - const u32 tableIndex1 = getBits<37, 3, u32>(colourData); - const u32 tableIndex2 = getBits<34, 3, u32>(colourData); - const u32 texelIndex = u * 4 + v; // Index of the texel in the block - - if (flip) std::swap(u, v); - - s32 r, g, b; - if (diffMode) { - r = getBits<59, 5, s32>(colourData); - g = getBits<51, 5, s32>(colourData); - b = getBits<43, 5, s32>(colourData); - - if (u >= 2) { - r += signExtend3To32(getBits<56, 3, u32>(colourData)); - g += signExtend3To32(getBits<48, 3, u32>(colourData)); - b += signExtend3To32(getBits<40, 3, u32>(colourData)); - } - - // Expand from 5 to 8 bits per channel - r = Colour::convert5To8Bit(r); - g = Colour::convert5To8Bit(g); - b = Colour::convert5To8Bit(b); - } else { - if (u < 2) { - r = getBits<60, 4, s32>(colourData); - g = getBits<52, 4, s32>(colourData); - b = getBits<44, 4, s32>(colourData); - } else { - r = getBits<56, 4, s32>(colourData); - g = getBits<48, 4, s32>(colourData); - b = getBits<40, 4, s32>(colourData); - } - - // Expand from 4 to 8 bits per channel - r = Colour::convert4To8Bit(r); - g = Colour::convert4To8Bit(g); - b = Colour::convert4To8Bit(b); - } - - const u32 index = (u < 2) ? tableIndex1 : tableIndex2; - s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; - - if (((negationFlags >> texelIndex) & 1) != 0) { - modifier = -modifier; - } - - r = std::clamp(r + modifier, 0, 255); - g = std::clamp(g + modifier, 0, 255); - b = std::clamp(b + modifier, 0, 255); - - return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r); - } -} // namespace Metal diff --git a/src/core/renderer_mtl/mtl_texture.cpp b/src/core/renderer_mtl/mtl_texture.cpp index a110b88f..3b4e065d 100644 --- a/src/core/renderer_mtl/mtl_texture.cpp +++ b/src/core/renderer_mtl/mtl_texture.cpp @@ -99,35 +99,7 @@ namespace Metal { } } - // u and v are the UVs of the relevant texel - // Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here - // https://en.wikipedia.org/wiki/Z-order_curve - // Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel - // The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 - // As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg - u32 Texture::mortonInterleave(u32 u, u32 v) { - static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; - static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; - - return xOffsets[u & 7] + yOffsets[v & 7]; - } - - // Get the byte offset of texel (u, v) in the texture - u32 Texture::getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel - - return offset * bytesPerPixel; - } - - // Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte - u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { - u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to - offset += mortonInterleave(u, v); // Add the in-tile offset of the texel - - return offset / 2; - } - + /* u8 Texture::decodeTexelU8(u32 u, u32 v, PICA::TextureFmt fmt, std::span data) { switch (fmt) { case PICA::TextureFmt::A4: { @@ -331,32 +303,18 @@ namespace Metal { default: Helpers::panic("[Texture::DecodeTexel] Unimplemented format = %d", static_cast(fmt)); } } + */ void Texture::decodeTexture(std::span data) { std::vector decoded; decoded.reserve(u64(size.u()) * u64(size.v()) * formatInfo.bytesPerTexel); - // Decode texels line by line - for (u32 v = 0; v < size.v(); v++) { - for (u32 u = 0; u < size.u(); u++) { - if (formatInfo.bytesPerTexel == 1) { - u8 texel = decodeTexelU8(u, v, format, data); - decoded.push_back(texel); - } else if (formatInfo.bytesPerTexel == 2) { - u16 texel = decodeTexelU16(u, v, format, data); - decoded.push_back((texel & 0x00ff) >> 0); - decoded.push_back((texel & 0xff00) >> 8); - } else if (formatInfo.bytesPerTexel == 4) { - u32 texel = decodeTexelU32(u, v, format, data); - decoded.push_back((texel & 0x000000ff) >> 0); - decoded.push_back((texel & 0x0000ff00) >> 8); - decoded.push_back((texel & 0x00ff0000) >> 16); - decoded.push_back((texel & 0xff000000) >> 24); - } else { - Helpers::panic("[Texture::decodeTexture] Unimplemented bytesPerTexel (%u)", formatInfo.bytesPerTexel); - } - } - } + // Decode texels line by line + for (u32 v = 0; v < size.v(); v++) { + for (u32 u = 0; u < size.u(); u++) { + formatInfo.decoder(size, u, v, data, decoded); + } + } texture->replaceRegion(MTL::Region(0, 0, size.u(), size.v()), 0, 0, decoded.data(), formatInfo.bytesPerTexel * size.u(), 0); } diff --git a/src/core/renderer_mtl/pica_to_mtl.cpp b/src/core/renderer_mtl/pica_to_mtl.cpp new file mode 100644 index 00000000..d527f000 --- /dev/null +++ b/src/core/renderer_mtl/pica_to_mtl.cpp @@ -0,0 +1,33 @@ +#include "renderer_mtl/pica_to_mtl.hpp" + +#include "renderer_mtl/texture_decoder.hpp" + +using namespace Helpers; + +namespace PICA { + + PixelFormatInfo pixelFormatInfos[14] = { + {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelABGR8ToRGBA8}, // RGBA8 + {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelBGR8ToRGBA8}, // RGB8 + {MTL::PixelFormatBGR5A1Unorm, 2, decodeTexelA1BGR5ToBGR5A1}, // RGBA5551 + {MTL::PixelFormatB5G6R5Unorm, 2, decodeTexelB5G6R5ToB5G6R5}, // RGB565 + {MTL::PixelFormatABGR4Unorm, 2, decodeTexelABGR4ToABGR4}, // RGBA4 + {MTL::PixelFormatRG8Unorm, 2, decodeTexelAI8ToRG8}, // IA8 + {MTL::PixelFormatRG8Unorm, 2, decodeTexelGR8ToRG8}, // RG8 + {MTL::PixelFormatR8Unorm, 1, decodeTexelI8ToR8}, // I8 + {MTL::PixelFormatA8Unorm, 1, decodeTexelA8ToA8}, // A8 + {MTL::PixelFormatRG8Unorm, 2, decodeTexelAI4ToRG8}, // IA4 + {MTL::PixelFormatR8Unorm, 1, decodeTexelI4ToR8}, // I4 + {MTL::PixelFormatA8Unorm, 1, decodeTexelA4ToA8}, // A4 + {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1ToRGBA8}, // ETC1 + {MTL::PixelFormatRGBA8Unorm, 4, decodeTexelETC1A4ToRGBA8}, // ETC1A4 + }; + + void checkForPixelFormatSupport(MTL::Device* device) { + if (!device->supportsFamily(MTL::GPUFamilyApple1)) { + // TODO + throw; + } + } + +} diff --git a/src/core/renderer_mtl/renderer_mtl.cpp b/src/core/renderer_mtl/renderer_mtl.cpp index 308ad715..1719eaf3 100644 --- a/src/core/renderer_mtl/renderer_mtl.cpp +++ b/src/core/renderer_mtl/renderer_mtl.cpp @@ -141,9 +141,9 @@ void RendererMTL::initGraphicsContext(SDL_Window* window) { device = MTL::CreateSystemDefaultDevice(); metalLayer->setDevice(device); #endif + checkForPixelFormatSupport(device); commandQueue = device->newCommandQueue(); - printf("C++ device pointer: %p\n", device); // Textures MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); diff --git a/src/core/renderer_mtl/texture_decoder.cpp b/src/core/renderer_mtl/texture_decoder.cpp new file mode 100644 index 00000000..d98eb06b --- /dev/null +++ b/src/core/renderer_mtl/texture_decoder.cpp @@ -0,0 +1,326 @@ +#include "renderer_mtl/texture_decoder.hpp" + +#include +#include + +#include "math_util.hpp" +#include "colour.hpp" + +using namespace Helpers; + +// u and v are the UVs of the relevant texel +// Texture data is stored interleaved in Morton order, ie in a Z - order curve as shown here +// https://en.wikipedia.org/wiki/Z-order_curve +// Textures are split into 8x8 tiles.This function returns the in - tile offset depending on the u & v of the texel +// The in - tile offset is the sum of 2 offsets, one depending on the value of u % 8 and the other on the value of y % 8 +// As documented in this picture https ://en.wikipedia.org/wiki/File:Moser%E2%80%93de_Bruijn_addition.svg +u32 mortonInterleave(u32 u, u32 v) { + static constexpr u32 xOffsets[] = {0, 1, 4, 5, 16, 17, 20, 21}; + static constexpr u32 yOffsets[] = {0, 2, 8, 10, 32, 34, 40, 42}; + + return xOffsets[u & 7] + yOffsets[v & 7]; +} + +// Get the byte offset of texel (u, v) in the texture +u32 getSwizzledOffset(u32 u, u32 v, u32 width, u32 bytesPerPixel) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset * bytesPerPixel; +} + +// Same as the above code except we need to divide by 2 because 4 bits is smaller than a byte +u32 getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) { + u32 offset = ((u & ~7) * 8) + ((v & ~7) * width); // Offset of the 8x8 tile the texel belongs to + offset += mortonInterleave(u, v); // Add the in-tile offset of the texel + + return offset / 2; +} + +void decodeTexelABGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 4); + const u8 alpha = inData[offset]; + const u8 b = inData[offset + 1]; + const u8 g = inData[offset + 2]; + const u8 r = inData[offset + 3]; + + outData.push_back(r); + outData.push_back(g); + outData.push_back(b); + outData.push_back(alpha); +} + +void decodeTexelBGR8ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 3); + const u8 b = inData[offset]; + const u8 g = inData[offset + 1]; + const u8 r = inData[offset + 2]; + + outData.push_back(r); + outData.push_back(g); + outData.push_back(b); + outData.push_back(0xff); +} + +void decodeTexelA1BGR5ToBGR5A1(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel); + u8 b = getBits<1, 5, u8>(texel); + u8 g = getBits<6, 5, u8>(texel); + u8 r = getBits<11, 5, u8>(texel); + + u16 outTexel = (alpha << 15) | (r << 10) | (g << 5) | b; + outData.push_back(outTexel & 0xff); + outData.push_back((outTexel >> 8) & 0xff); +} + +void decodeTexelA1BGR5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8); + + u8 alpha = getBit<0>(texel) ? 0xff : 0; + u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel)); + u8 g = Colour::convert5To8Bit(getBits<6, 5, u8>(texel)); + u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel)); + + outData.push_back(r); + outData.push_back(g); + outData.push_back(b); + outData.push_back(alpha); +} + +void decodeTexelB5G6R5ToB5G6R5(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8); + + outData.push_back(texel & 0xff); + outData.push_back((texel >> 8) & 0xff); +} + +void decodeTexelB5G6R5ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 2); + const u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8); + + const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel)); + const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel)); + const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel)); + + outData.push_back(r); + outData.push_back(g); + outData.push_back(b); + outData.push_back(0xff); +} + +void decodeTexelABGR4ToABGR4(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8); + + u8 alpha = getBits<0, 4, u8>(texel); + u8 b = getBits<4, 4, u8>(texel); + u8 g = getBits<8, 4, u8>(texel); + u8 r = getBits<12, 4, u8>(texel); + + outData.push_back((b << 4) | alpha); + outData.push_back((r << 4) | g); +} + +void decodeTexelABGR4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + u16 texel = u16(inData[offset]) | (u16(inData[offset + 1]) << 8); + + u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel)); + u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel)); + u8 g = Colour::convert4To8Bit(getBits<8, 4, u8>(texel)); + u8 r = Colour::convert4To8Bit(getBits<12, 4, u8>(texel)); + + outData.push_back(r); + outData.push_back(g); + outData.push_back(b); + outData.push_back(alpha); +} + +void decodeTexelAI8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + + // Same as I8 except each pixel gets its own alpha value too + const u8 alpha = inData[offset]; + const u8 intensity = inData[offset + 1]; + + outData.push_back(intensity); + outData.push_back(alpha); +} + +void decodeTexelGR8ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset(u, v, size.u(), 2); + constexpr u8 b = 0; + const u8 g = inData[offset]; + const u8 r = inData[offset + 1]; + + outData.push_back(r); + outData.push_back(g); +} + +void decodeTexelI8ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 intensity = inData[offset]; + + outData.push_back(intensity); +} + +void decodeTexelA8ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 alpha = inData[offset]; + + outData.push_back(alpha); +} + +void decodeTexelAI4ToRG8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset(u, v, size.u(), 1); + const u8 texel = inData[offset]; + const u8 alpha = Colour::convert4To8Bit(texel & 0xf); + const u8 intensity = Colour::convert4To8Bit(texel >> 4); + + outData.push_back(intensity); + outData.push_back(alpha); +} + +void decodeTexelI4ToR8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 intensity = inData[offset] >> ((u % 2) ? 4 : 0); + intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity)); + + outData.push_back(intensity); +} + +void decodeTexelA4ToA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + const u32 offset = getSwizzledOffset_4bpp(u, v, size.u()); + + // For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates + u8 alpha = inData[offset] >> ((u % 2) ? 4 : 0); + alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha)); + + outData.push_back(alpha); +} + +static constexpr u32 signExtend3To32(u32 val) { + return (u32)(s32(val) << 29 >> 29); +} + +void decodeETC(u32 u, u32 v, u64 colourData, u32 alpha, std::vector& outData) { + static constexpr u32 modifiers[8][2] = { + {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}, + }; + + // Parse colour data for 4x4 block + const u32 subindices = getBits<0, 16, u32>(colourData); + const u32 negationFlags = getBits<16, 16, u32>(colourData); + const bool flip = getBit<32>(colourData); + const bool diffMode = getBit<33>(colourData); + + // Note: index1 is indeed stored on the higher bits, with index2 in the lower bits + const u32 tableIndex1 = getBits<37, 3, u32>(colourData); + const u32 tableIndex2 = getBits<34, 3, u32>(colourData); + const u32 texelIndex = u * 4 + v; // Index of the texel in the block + + if (flip) std::swap(u, v); + + s32 r, g, b; + if (diffMode) { + r = getBits<59, 5, s32>(colourData); + g = getBits<51, 5, s32>(colourData); + b = getBits<43, 5, s32>(colourData); + + if (u >= 2) { + r += signExtend3To32(getBits<56, 3, u32>(colourData)); + g += signExtend3To32(getBits<48, 3, u32>(colourData)); + b += signExtend3To32(getBits<40, 3, u32>(colourData)); + } + + // Expand from 5 to 8 bits per channel + r = Colour::convert5To8Bit(r); + g = Colour::convert5To8Bit(g); + b = Colour::convert5To8Bit(b); + } else { + if (u < 2) { + r = getBits<60, 4, s32>(colourData); + g = getBits<52, 4, s32>(colourData); + b = getBits<44, 4, s32>(colourData); + } else { + r = getBits<56, 4, s32>(colourData); + g = getBits<48, 4, s32>(colourData); + b = getBits<40, 4, s32>(colourData); + } + + // Expand from 4 to 8 bits per channel + r = Colour::convert4To8Bit(r); + g = Colour::convert4To8Bit(g); + b = Colour::convert4To8Bit(b); + } + + const u32 index = (u < 2) ? tableIndex1 : tableIndex2; + s32 modifier = modifiers[index][(subindices >> texelIndex) & 1]; + + if (((negationFlags >> texelIndex) & 1) != 0) { + modifier = -modifier; + } + + r = std::clamp(r + modifier, 0, 255); + g = std::clamp(g + modifier, 0, 255); + b = std::clamp(b + modifier, 0, 255); + + outData.push_back(r); + outData.push_back(g); + outData.push_back(b); + outData.push_back(alpha); +} + +template +void getTexelETC(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + // Pixel offset of the 8x8 tile based on u, v and the width of the texture + u32 offs = ((u & ~7) * 8) + ((v & ~7) * size.u()); + if (!hasAlpha) { + offs >>= 1; + } + + // In-tile offsets for u/v + u &= 7; + v &= 7; + + // ETC1(A4) also subdivide the 8x8 tile to 4 4x4 tiles + // Each tile is 8 bytes for ETC1, but since ETC1A4 has 4 alpha bits per pixel, that becomes 16 bytes + const u32 subTileSize = hasAlpha ? 16 : 8; + const u32 subTileIndex = (u / 4) + 2 * (v / 4); // Which of the 4 subtiles is this texel in? + + // In-subtile offsets for u/v + u &= 3; + v &= 3; + offs += subTileSize * subTileIndex; + + u32 alpha; + const u64* ptr = reinterpret_cast(inData.data() + offs); // Cast to u64* + + if (hasAlpha) { + // First 64 bits of the 4x4 subtile are alpha data + const u64 alphaData = *ptr++; + alpha = Colour::convert4To8Bit((alphaData >> (4 * (u * 4 + v))) & 0xf); + } else { + alpha = 0xff; // ETC1 without alpha uses ff for every pixel + } + + // Next 64 bits of the subtile are colour data + u64 colourData = *ptr; + + decodeETC(u, v, colourData, alpha, outData); +} + +void decodeTexelETC1ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + getTexelETC(size, u, v, inData, outData); +} + +void decodeTexelETC1A4ToRGBA8(OpenGL::uvec2 size, u32 u, u32 v, std::span inData, std::vector& outData) { + getTexelETC(size, u, v, inData, outData); +} diff --git a/third_party/LuaJIT b/third_party/LuaJIT index 8bf7686d..41edf095 160000 --- a/third_party/LuaJIT +++ b/third_party/LuaJIT @@ -1 +1 @@ -Subproject commit 8bf7686d820f868eae1a522c481fee09c18c90b9 +Subproject commit 41edf0959b9504d36dd85f5f16893c004ea7d7ba diff --git a/third_party/oaknut b/third_party/oaknut index 790374d7..94c726ce 160000 --- a/third_party/oaknut +++ b/third_party/oaknut @@ -1 +1 @@ -Subproject commit 790374d7e66257b1f8ed89d798e5dcfb5363af05 +Subproject commit 94c726ce0338b054eb8cb5ea91de8fe6c19f4392