mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-06-03 12:27:21 +12:00
Moar shader decompiler (#559)
* Renderer: Add prepareForDraw callback * Add fmt submodule and port shader decompiler instructions to it * Add shader acceleration setting * Hook up vertex shaders to shader cache * Shader decompiler: Fix redundant compilations * Shader Decompiler: Fix vertex attribute upload * Shader compiler: Simplify generated code for reading and faster compilation * Further simplify shader decompiler output * Shader decompiler: More smallen-ing * Shader decompiler: Get PICA uniforms uploaded to the GPU * Shader decompiler: Readd clipping * Shader decompiler: Actually `break` on control flow instructions * Shader decompiler: More control flow handling * Shader decompiler: Fix desitnation mask * Shader Decomp: Remove pair member capture in lambda (unsupported on NDK) * Disgusting changes to handle the fact that hw shader shaders are 2x as big * Shader decompiler: Implement proper output semantic mapping * Moar instructions * Shader decompiler: Add FLR/SLT/SLTI/SGE/SGEI * Shader decompiler: Add register indexing * Shader decompiler: Optimize mova with both x and y masked * Shader decompiler: Add DPH/DPHI * Fix shader caching being broken * PICA decompiler: Cache VS uniforms * Simply vertex cache code * Simplify vertex cache code * Shader decompiler: Add loops * Shader decompiler: Implement safe multiplication * Shader decompiler: Implement LG2/EX2 * Shader decompiler: More control flow * Shader decompiler: Fix JMPU condition * Shader decompiler: Convert main function to void * PICA: Start implementing GPU vertex fetch * More hw VAO work * More hw VAO work * More GPU vertex fetch code * Add GL Stream Buffer from Duckstation * GL: Actually upload data to stream buffers * GPU: Cleanup immediate mode handling * Get first renders working with accelerated draws * Shader decompiler: Fix control flow analysis bugs * HW shaders: Accelerate indexed draws * Shader decompiler: Add support for compilation errors * GLSL decompiler: Fall back for LITP * Add Renderdoc scope classes * Fix control flow analysis bug * HW shaders: Fix attribute fetch * Rewriting hw vertex fetch * Stream buffer: Fix copy-paste mistake * HW shaders: Fix indexed rendering * HW shaders: Add padding attributes * HW shaders: Avoid redundant glVertexAttrib4f calls * HW shaders: Fix loops * HW shaders: Make generated shaders slightly smaller * Fix libretro build * HW shaders: Fix android * Remove redundant ubershader checks * Set accelerate shader default to true * Shader decompiler: Don't declare VS input attributes as an array * Change ubuntu-latest to Ubuntu 24.04 because Microsoft screwed up their CI again * fix merge conflict bug
This commit is contained in:
parent
afaf18f124
commit
49a94a13c5
34 changed files with 1877 additions and 253 deletions
45
include/PICA/draw_acceleration.hpp
Normal file
45
include/PICA/draw_acceleration.hpp
Normal file
|
@ -0,0 +1,45 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
struct DrawAcceleration {
|
||||
static constexpr u32 maxAttribCount = 16;
|
||||
static constexpr u32 maxLoaderCount = 12;
|
||||
|
||||
struct AttributeInfo {
|
||||
u32 offset;
|
||||
u32 stride;
|
||||
|
||||
u8 type;
|
||||
u8 componentCount;
|
||||
|
||||
std::array<float, 4> fixedValue; // For fixed attributes
|
||||
};
|
||||
|
||||
struct Loader {
|
||||
// Data to upload for this loader
|
||||
u8* data;
|
||||
usize size;
|
||||
};
|
||||
|
||||
u8* indexBuffer;
|
||||
|
||||
// Minimum and maximum index in the index buffer for a draw call
|
||||
u16 minimumIndex, maximumIndex;
|
||||
u32 totalAttribCount;
|
||||
u32 totalLoaderCount;
|
||||
u32 enabledAttributeMask;
|
||||
u32 fixedAttributes;
|
||||
u32 vertexDataSize;
|
||||
|
||||
std::array<AttributeInfo, maxAttribCount> attributeInfo;
|
||||
std::array<Loader, maxLoaderCount> loaders;
|
||||
|
||||
bool canBeAccelerated;
|
||||
bool indexed;
|
||||
bool useShortIndices;
|
||||
};
|
||||
} // namespace PICA
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
|
||||
#include "PICA/draw_acceleration.hpp"
|
||||
#include "PICA/dynapica/shader_rec.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
|
@ -13,6 +14,12 @@
|
|||
#include "memory.hpp"
|
||||
#include "renderer.hpp"
|
||||
|
||||
enum class ShaderExecMode {
|
||||
Interpreter, // Interpret shaders on the CPU
|
||||
JIT, // Recompile shaders to CPU machine code
|
||||
Hardware, // Recompiler shaders to host shaders and run them on the GPU
|
||||
};
|
||||
|
||||
class GPU {
|
||||
static constexpr u32 regNum = 0x300;
|
||||
static constexpr u32 extRegNum = 0x1000;
|
||||
|
@ -45,7 +52,7 @@ class GPU {
|
|||
uint immediateModeVertIndex;
|
||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||
|
||||
template <bool indexed, bool useShaderJIT>
|
||||
template <bool indexed, ShaderExecMode mode>
|
||||
void drawArrays();
|
||||
|
||||
// Silly method of avoiding linking problems. TODO: Change to something less silly
|
||||
|
@ -81,6 +88,7 @@ class GPU {
|
|||
std::unique_ptr<Renderer> renderer;
|
||||
PICA::Vertex getImmediateModeVertex();
|
||||
|
||||
void getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed);
|
||||
public:
|
||||
// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
|
||||
// Encoded in PICA native format
|
||||
|
|
57
include/PICA/pica_vert_config.hpp
Normal file
57
include/PICA/pica_vert_config.hpp
Normal file
|
@ -0,0 +1,57 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader.hpp"
|
||||
#include "bitfield.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
namespace PICA {
|
||||
// Configuration struct used
|
||||
struct VertConfig {
|
||||
PICAHash::HashType shaderHash;
|
||||
PICAHash::HashType opdescHash;
|
||||
u32 entrypoint;
|
||||
|
||||
// PICA registers for configuring shader output->fragment semantic mapping
|
||||
std::array<u32, 7> outmaps{};
|
||||
u16 outputMask;
|
||||
u8 outputCount;
|
||||
bool usingUbershader;
|
||||
|
||||
// Pad to 56 bytes so that the compiler won't insert unnecessary padding, which in turn will affect our unordered_map lookup
|
||||
// As the padding will get hashed and memcmp'd...
|
||||
u32 pad{};
|
||||
|
||||
bool operator==(const VertConfig& config) const {
|
||||
// Hash function and equality operator required by std::unordered_map
|
||||
return std::memcmp(this, &config, sizeof(VertConfig)) == 0;
|
||||
}
|
||||
|
||||
VertConfig(PICAShader& shader, const std::array<u32, 0x300>& regs, bool usingUbershader) : usingUbershader(usingUbershader) {
|
||||
shaderHash = shader.getCodeHash();
|
||||
opdescHash = shader.getOpdescHash();
|
||||
entrypoint = shader.entrypoint;
|
||||
|
||||
outputCount = regs[PICA::InternalRegs::ShaderOutputCount] & 7;
|
||||
outputMask = regs[PICA::InternalRegs::VertexShaderOutputMask];
|
||||
for (int i = 0; i < outputCount; i++) {
|
||||
// Mask out unused bits
|
||||
outmaps[i] = regs[PICA::InternalRegs::ShaderOutmap0 + i] & 0x1F1F1F1F;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace PICA
|
||||
|
||||
static_assert(sizeof(PICA::VertConfig) == 56);
|
||||
|
||||
// Override std::hash for our vertex config class
|
||||
template <>
|
||||
struct std::hash<PICA::VertConfig> {
|
||||
std::size_t operator()(const PICA::VertConfig& config) const noexcept { return PICAHash::computeHash((const char*)&config, sizeof(config)); }
|
||||
};
|
|
@ -107,6 +107,11 @@ class PICAShader {
|
|||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> outputs;
|
||||
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||
|
||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
protected:
|
||||
std::array<u32, 128> operandDescriptors;
|
||||
|
@ -125,14 +130,13 @@ class PICAShader {
|
|||
std::array<CallInfo, 4> callInfo;
|
||||
ShaderType type;
|
||||
|
||||
// We use a hashmap for matching 3DS shaders to their equivalent compiled code in our shader cache in the shader JIT
|
||||
// We choose our hash type to be a 64-bit integer by default, as the collision chance is very tiny and generating it is decently optimal
|
||||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
||||
|
||||
public:
|
||||
bool uniformsDirty = false;
|
||||
|
||||
protected:
|
||||
bool codeHashDirty = false;
|
||||
bool opdescHashDirty = false;
|
||||
|
||||
|
@ -284,6 +288,7 @@ class PICAShader {
|
|||
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
}
|
||||
uniformsDirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -295,6 +300,12 @@ class PICAShader {
|
|||
u[1] = getBits<8, 8>(word);
|
||||
u[2] = getBits<16, 8>(word);
|
||||
u[3] = getBits<24, 8>(word);
|
||||
uniformsDirty = true;
|
||||
}
|
||||
|
||||
void uploadBoolUniform(u32 value) {
|
||||
boolUniform = value;
|
||||
uniformsDirty = true;
|
||||
}
|
||||
|
||||
void run();
|
||||
|
@ -302,6 +313,10 @@ class PICAShader {
|
|||
|
||||
Hash getCodeHash();
|
||||
Hash getOpdescHash();
|
||||
|
||||
// Returns how big the PICA uniforms are combined. Used for hw accelerated shaders where we upload the uniforms to our GPU.
|
||||
static constexpr usize totalUniformSize() { return sizeof(floatUniforms) + sizeof(intUniforms) + sizeof(boolUniform); }
|
||||
void* getUniformPointer() { return static_cast<void*>(&floatUniforms); }
|
||||
};
|
||||
|
||||
static_assert(
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
#pragma once
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
|
@ -41,9 +44,12 @@ namespace PICA::ShaderGen {
|
|||
explicit Function(u32 start, u32 end) : start(start), end(end) {}
|
||||
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
|
||||
|
||||
std::string getIdentifier() const { return "func_" + std::to_string(start) + "_to_" + std::to_string(end); }
|
||||
std::string getForwardDecl() const { return "void " + getIdentifier() + "();\n"; }
|
||||
std::string getCallStatement() const { return getIdentifier() + "()"; }
|
||||
std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); }
|
||||
// To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end
|
||||
// instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called
|
||||
// from within functions deep in the callstack
|
||||
std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); }
|
||||
std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); }
|
||||
};
|
||||
|
||||
std::set<Function> functions{};
|
||||
|
@ -93,9 +99,11 @@ namespace PICA::ShaderGen {
|
|||
|
||||
API api;
|
||||
Language language;
|
||||
bool compilationError = false;
|
||||
|
||||
void compileInstruction(u32& pc, bool& finished);
|
||||
void compileRange(const AddressRange& range);
|
||||
// Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction)
|
||||
std::pair<u32, bool> compileRange(const AddressRange& range);
|
||||
void callFunction(const Function& function);
|
||||
const Function* findFunction(const AddressRange& range);
|
||||
|
||||
|
@ -105,6 +113,7 @@ namespace PICA::ShaderGen {
|
|||
std::string getDest(u32 dest) const;
|
||||
std::string getSwizzlePattern(u32 swizzle) const;
|
||||
std::string getDestSwizzle(u32 destinationMask) const;
|
||||
const char* getCondition(u32 cond, u32 refX, u32 refY);
|
||||
|
||||
void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value);
|
||||
// Returns if the instruction uses the typical register encodings most instructions use
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/pica_frag_config.hpp"
|
||||
#include "PICA/pica_vert_config.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_gen_types.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
@ -31,6 +32,8 @@ namespace PICA::ShaderGen {
|
|||
FragmentGenerator(API api, Language language) : api(api), language(language) {}
|
||||
std::string generate(const PICA::FragmentConfig& config, void* driverInfo = nullptr);
|
||||
std::string getDefaultVertexShader();
|
||||
// For when PICA shader is acceleration is enabled. Turn the PICA shader source into a proper vertex shader
|
||||
std::string getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader);
|
||||
|
||||
void setTarget(API api, Language language) {
|
||||
this->api = api;
|
||||
|
|
|
@ -2,10 +2,9 @@
|
|||
#include "PICA/shader.hpp"
|
||||
|
||||
class ShaderUnit {
|
||||
|
||||
public:
|
||||
PICAShader vs; // Vertex shader
|
||||
PICAShader gs; // Geometry shader
|
||||
public:
|
||||
PICAShader vs; // Vertex shader
|
||||
PICAShader gs; // Geometry shader
|
||||
|
||||
ShaderUnit() : vs(ShaderType::Vertex), gs(ShaderType::Geometry) {}
|
||||
void reset();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue