mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 06:05:40 +12:00
* Renderer: Add prepareForDraw callback * Add fmt submodule and port shader decompiler instructions to it * Add shader acceleration setting * Hook up vertex shaders to shader cache * Shader decompiler: Fix redundant compilations * Shader Decompiler: Fix vertex attribute upload * Shader compiler: Simplify generated code for reading and faster compilation * Further simplify shader decompiler output * Shader decompiler: More smallen-ing * Shader decompiler: Get PICA uniforms uploaded to the GPU * Shader decompiler: Readd clipping * Shader decompiler: Actually `break` on control flow instructions * Shader decompiler: More control flow handling * Shader decompiler: Fix desitnation mask * Shader Decomp: Remove pair member capture in lambda (unsupported on NDK) * Disgusting changes to handle the fact that hw shader shaders are 2x as big * Shader decompiler: Implement proper output semantic mapping * Moar instructions * Shader decompiler: Add FLR/SLT/SLTI/SGE/SGEI * Shader decompiler: Add register indexing * Shader decompiler: Optimize mova with both x and y masked * Shader decompiler: Add DPH/DPHI * Fix shader caching being broken * PICA decompiler: Cache VS uniforms * Simply vertex cache code * Simplify vertex cache code * Shader decompiler: Add loops * Shader decompiler: Implement safe multiplication * Shader decompiler: Implement LG2/EX2 * Shader decompiler: More control flow * Shader decompiler: Fix JMPU condition * Shader decompiler: Convert main function to void * PICA: Start implementing GPU vertex fetch * More hw VAO work * More hw VAO work * More GPU vertex fetch code * Add GL Stream Buffer from Duckstation * GL: Actually upload data to stream buffers * GPU: Cleanup immediate mode handling * Get first renders working with accelerated draws * Shader decompiler: Fix control flow analysis bugs * HW shaders: Accelerate indexed draws * Shader decompiler: Add support for compilation errors * GLSL decompiler: Fall back for LITP * Add Renderdoc scope classes * Fix control flow analysis bug * HW shaders: Fix attribute fetch * Rewriting hw vertex fetch * Stream buffer: Fix copy-paste mistake * HW shaders: Fix indexed rendering * HW shaders: Add padding attributes * HW shaders: Avoid redundant glVertexAttrib4f calls * HW shaders: Fix loops * HW shaders: Make generated shaders slightly smaller * Fix libretro build * HW shaders: Fix android * Remove redundant ubershader checks * Set accelerate shader default to true * Shader decompiler: Don't declare VS input attributes as an array * Change ubuntu-latest to Ubuntu 24.04 because Microsoft screwed up their CI again * fix merge conflict bug
131 lines
No EOL
5.1 KiB
C++
131 lines
No EOL
5.1 KiB
C++
#pragma once
|
|
#include <fmt/format.h>
|
|
|
|
#include <map>
|
|
#include <set>
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "PICA/shader.hpp"
|
|
#include "PICA/shader_gen_types.hpp"
|
|
|
|
struct EmulatorConfig;
|
|
|
|
namespace PICA::ShaderGen {
|
|
// Control flow analysis is partially based on
|
|
// https://github.com/PabloMK7/citra/blob/d0179559466ff09731d74474322ee880fbb44b00/src/video_core/shader/generator/glsl_shader_decompiler.cpp#L33
|
|
struct ControlFlow {
|
|
// A continuous range of addresses
|
|
struct AddressRange {
|
|
u32 start, end;
|
|
AddressRange(u32 start, u32 end) : start(start), end(end) {}
|
|
|
|
// Use lexicographic comparison for functions in order to sort them in a set
|
|
bool operator<(const AddressRange& other) const { return std::tie(start, end) < std::tie(other.start, other.end); }
|
|
};
|
|
|
|
struct Function {
|
|
using Labels = std::set<u32>;
|
|
|
|
enum class ExitMode {
|
|
Unknown, // Can't guarantee whether we'll exit properly, fall back to CPU shaders (can happen with jmp shenanigans)
|
|
AlwaysReturn, // All paths reach the return point.
|
|
Conditional, // One or more code paths reach the return point or an END instruction conditionally.
|
|
AlwaysEnd, // All paths reach an END instruction.
|
|
};
|
|
|
|
u32 start; // Starting PC of the function
|
|
u32 end; // End PC of the function
|
|
Labels outLabels{}; // Labels this function can "goto" (jump) to
|
|
ExitMode exitMode = ExitMode::Unknown;
|
|
|
|
explicit Function(u32 start, u32 end) : start(start), end(end) {}
|
|
bool operator<(const Function& other) const { return AddressRange(start, end) < AddressRange(other.start, other.end); }
|
|
|
|
std::string getIdentifier() const { return fmt::format("fn_{}_{}", start, end); }
|
|
// To handle weird control flow, we have to return from each function a bool that indicates whether or not the shader reached an end
|
|
// instruction and should thus terminate. This is necessary for games like Rayman and Gravity Falls, which have "END" instructions called
|
|
// from within functions deep in the callstack
|
|
std::string getForwardDecl() const { return fmt::format("bool fn_{}_{}();\n", start, end); }
|
|
std::string getCallStatement() const { return fmt::format("fn_{}_{}()", start, end); }
|
|
};
|
|
|
|
std::set<Function> functions{};
|
|
std::map<AddressRange, Function::ExitMode> exitMap{};
|
|
|
|
// Tells us whether analysis of the shader we're trying to compile failed, in which case we'll need to fail back to shader emulation
|
|
// On the CPU
|
|
bool analysisFailed = false;
|
|
|
|
// This will recursively add all functions called by the function too, as analyzeFunction will call addFunction on control flow instructions
|
|
const Function* addFunction(const PICAShader& shader, u32 start, u32 end) {
|
|
auto searchIterator = functions.find(Function(start, end));
|
|
if (searchIterator != functions.end()) {
|
|
return &(*searchIterator);
|
|
}
|
|
|
|
// Add this function and analyze it if it doesn't already exist
|
|
Function function(start, end);
|
|
function.exitMode = analyzeFunction(shader, start, end, function.outLabels);
|
|
|
|
// This function could not be fully analyzed, report failure
|
|
if (function.exitMode == Function::ExitMode::Unknown) {
|
|
analysisFailed = true;
|
|
return nullptr;
|
|
}
|
|
|
|
// Add function to our function list
|
|
auto [it, added] = functions.insert(std::move(function));
|
|
return &(*it);
|
|
}
|
|
|
|
void analyze(const PICAShader& shader, u32 entrypoint);
|
|
Function::ExitMode analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels);
|
|
};
|
|
|
|
class ShaderDecompiler {
|
|
using AddressRange = ControlFlow::AddressRange;
|
|
using Function = ControlFlow::Function;
|
|
|
|
ControlFlow controlFlow{};
|
|
|
|
PICAShader& shader;
|
|
EmulatorConfig& config;
|
|
std::string decompiledShader;
|
|
|
|
u32 entrypoint;
|
|
|
|
API api;
|
|
Language language;
|
|
bool compilationError = false;
|
|
|
|
void compileInstruction(u32& pc, bool& finished);
|
|
// Compile range "range" and returns the end PC or if we're "finished" with the program (called an END instruction)
|
|
std::pair<u32, bool> compileRange(const AddressRange& range);
|
|
void callFunction(const Function& function);
|
|
const Function* findFunction(const AddressRange& range);
|
|
|
|
void writeAttributes();
|
|
|
|
std::string getSource(u32 source, u32 index) const;
|
|
std::string getDest(u32 dest) const;
|
|
std::string getSwizzlePattern(u32 swizzle) const;
|
|
std::string getDestSwizzle(u32 destinationMask) const;
|
|
const char* getCondition(u32 cond, u32 refX, u32 refY);
|
|
|
|
void setDest(u32 operandDescriptor, const std::string& dest, const std::string& value);
|
|
// Returns if the instruction uses the typical register encodings most instructions use
|
|
// With some exceptions like MAD/MADI, and the control flow instructions which are completely different
|
|
bool usesCommonEncoding(u32 instruction) const;
|
|
|
|
public:
|
|
ShaderDecompiler(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language)
|
|
: shader(shader), entrypoint(entrypoint), config(config), api(api), language(language), decompiledShader("") {}
|
|
|
|
std::string decompile();
|
|
};
|
|
|
|
std::string decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language);
|
|
} // namespace PICA::ShaderGen
|