mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-07 22:55:40 +12:00
* Renderer: Add prepareForDraw callback * Add fmt submodule and port shader decompiler instructions to it * Add shader acceleration setting * Hook up vertex shaders to shader cache * Shader decompiler: Fix redundant compilations * Shader Decompiler: Fix vertex attribute upload * Shader compiler: Simplify generated code for reading and faster compilation * Further simplify shader decompiler output * Shader decompiler: More smallen-ing * Shader decompiler: Get PICA uniforms uploaded to the GPU * Shader decompiler: Readd clipping * Shader decompiler: Actually `break` on control flow instructions * Shader decompiler: More control flow handling * Shader decompiler: Fix desitnation mask * Shader Decomp: Remove pair member capture in lambda (unsupported on NDK) * Disgusting changes to handle the fact that hw shader shaders are 2x as big * Shader decompiler: Implement proper output semantic mapping * Moar instructions * Shader decompiler: Add FLR/SLT/SLTI/SGE/SGEI * Shader decompiler: Add register indexing * Shader decompiler: Optimize mova with both x and y masked * Shader decompiler: Add DPH/DPHI * Fix shader caching being broken * PICA decompiler: Cache VS uniforms * Simply vertex cache code * Simplify vertex cache code * Shader decompiler: Add loops * Shader decompiler: Implement safe multiplication * Shader decompiler: Implement LG2/EX2 * Shader decompiler: More control flow * Shader decompiler: Fix JMPU condition * Shader decompiler: Convert main function to void * PICA: Start implementing GPU vertex fetch * More hw VAO work * More hw VAO work * More GPU vertex fetch code * Add GL Stream Buffer from Duckstation * GL: Actually upload data to stream buffers * GPU: Cleanup immediate mode handling * Get first renders working with accelerated draws * Shader decompiler: Fix control flow analysis bugs * HW shaders: Accelerate indexed draws * Shader decompiler: Add support for compilation errors * GLSL decompiler: Fall back for LITP * Add Renderdoc scope classes * Fix control flow analysis bug * HW shaders: Fix attribute fetch * Rewriting hw vertex fetch * Stream buffer: Fix copy-paste mistake * HW shaders: Fix indexed rendering * HW shaders: Add padding attributes * HW shaders: Avoid redundant glVertexAttrib4f calls * HW shaders: Fix loops * HW shaders: Make generated shaders slightly smaller * Fix libretro build * HW shaders: Fix android * Remove redundant ubershader checks * Set accelerate shader default to true * Shader decompiler: Don't declare VS input attributes as an array * Change ubuntu-latest to Ubuntu 24.04 because Microsoft screwed up their CI again * fix merge conflict bug
99 lines
No EOL
2.6 KiB
C++
99 lines
No EOL
2.6 KiB
C++
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
|
|
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
|
|
|
|
#pragma once
|
|
|
|
#include <cstdlib>
|
|
|
|
#include "helpers.hpp"
|
|
|
|
#ifdef _MSC_VER
|
|
#include <malloc.h>
|
|
#endif
|
|
|
|
namespace Common {
|
|
template <typename T>
|
|
constexpr bool isAligned(T value, unsigned int alignment) {
|
|
return (value % static_cast<T>(alignment)) == 0;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T alignUp(T value, unsigned int alignment) {
|
|
return (value + static_cast<T>(alignment - 1)) / static_cast<T>(alignment) * static_cast<T>(alignment);
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T alignDown(T value, unsigned int alignment) {
|
|
return value / static_cast<T>(alignment) * static_cast<T>(alignment);
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr bool isAlignedPow2(T value, unsigned int alignment) {
|
|
return (value & static_cast<T>(alignment - 1)) == 0;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T alignUpPow2(T value, unsigned int alignment) {
|
|
return (value + static_cast<T>(alignment - 1)) & static_cast<T>(~static_cast<T>(alignment - 1));
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T alignDownPow2(T value, unsigned int alignment) {
|
|
return value & static_cast<T>(~static_cast<T>(alignment - 1));
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr bool isPow2(T value) {
|
|
return (value & (value - 1)) == 0;
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T previousPow2(T value) {
|
|
if (value == static_cast<T>(0)) return 0;
|
|
|
|
value |= (value >> 1);
|
|
value |= (value >> 2);
|
|
value |= (value >> 4);
|
|
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
|
|
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
|
|
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
|
|
return value - (value >> 1);
|
|
}
|
|
|
|
template <typename T>
|
|
constexpr T nextPow2(T value) {
|
|
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
|
|
if (value == static_cast<T>(0)) return 0;
|
|
|
|
value--;
|
|
value |= (value >> 1);
|
|
value |= (value >> 2);
|
|
value |= (value >> 4);
|
|
if constexpr (sizeof(T) >= 16) value |= (value >> 8);
|
|
if constexpr (sizeof(T) >= 32) value |= (value >> 16);
|
|
if constexpr (sizeof(T) >= 64) value |= (value >> 32);
|
|
value++;
|
|
return value;
|
|
}
|
|
|
|
ALWAYS_INLINE static void* alignedMalloc(size_t size, size_t alignment) {
|
|
#ifdef _MSC_VER
|
|
return _aligned_malloc(size, alignment);
|
|
#else
|
|
// Unaligned sizes are slow on macOS.
|
|
#ifdef __APPLE__
|
|
if (isPow2(alignment)) size = (size + alignment - 1) & ~(alignment - 1);
|
|
#endif
|
|
void* ret = nullptr;
|
|
return (posix_memalign(&ret, alignment, size) == 0) ? ret : nullptr;
|
|
#endif
|
|
}
|
|
|
|
ALWAYS_INLINE static void alignedFree(void* ptr) {
|
|
#ifdef _MSC_VER
|
|
_aligned_free(ptr);
|
|
#else
|
|
free(ptr);
|
|
#endif
|
|
}
|
|
} // namespace Common
|