mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-06-03 12:27:21 +12:00
Merge pull request #98 from Wunkolo/modular-gl
Allow conditional OpenGL rendering backend
This commit is contained in:
commit
786c3e8a5c
18 changed files with 545 additions and 407 deletions
|
@ -21,7 +21,7 @@ class ShaderJIT {
|
|||
ShaderCache cache;
|
||||
#endif
|
||||
|
||||
public:
|
||||
public:
|
||||
#ifdef PANDA3DS_SHADER_JIT_SUPPORTED
|
||||
// Call this before starting to process a batch of vertices
|
||||
// This will read the PICA config (uploaded shader and shader operand descriptors) and search if we've already compiled this shader
|
||||
|
@ -29,9 +29,7 @@ public:
|
|||
// The caller must make sure the entrypoint has been properly set beforehand
|
||||
void prepare(PICAShader& shaderUnit);
|
||||
void reset();
|
||||
void run(PICAShader& shaderUnit) {
|
||||
prologueCallback(shaderUnit, entrypointCallback);
|
||||
}
|
||||
void run(PICAShader& shaderUnit) { prologueCallback(shaderUnit, entrypointCallback); }
|
||||
|
||||
static constexpr bool isAvailable() { return true; }
|
||||
#else
|
||||
|
@ -44,7 +42,7 @@ public:
|
|||
}
|
||||
|
||||
// Define dummy callback. This should never be called if the shader JIT is not supported
|
||||
using Callback = void(*)(PICAShader& shaderUnit);
|
||||
using Callback = void (*)(PICAShader& shaderUnit);
|
||||
Callback activeShaderCallback = nullptr;
|
||||
|
||||
void reset() {}
|
||||
|
|
|
@ -2,17 +2,17 @@
|
|||
|
||||
// Only do anything if we're on an x64 target with JIT support enabled
|
||||
#if defined(PANDA3DS_DYNAPICA_SUPPORTED) && defined(PANDA3DS_X64_HOST)
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "PICA/shader.hpp"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
#include "x64_regs.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "x64_regs.hpp"
|
||||
#include "xbyak/xbyak.h"
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
class ShaderEmitter : public Xbyak::CodeGenerator {
|
||||
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
|
||||
static constexpr size_t executableMemorySize = PICAShader::maxInstructionCount * 96; // How much executable memory to alloc for each shader
|
||||
// Allocate some extra space as padding for security purposes in the extremely unlikely occasion we manage to overflow the above size
|
||||
static constexpr size_t allocSize = executableMemorySize + 0x1000;
|
||||
|
||||
|
@ -20,7 +20,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
static constexpr uint noSwizzle = 0x1B;
|
||||
|
||||
using f24 = Floats::f24;
|
||||
using vec4f = OpenGL::Vector<f24, 4>;
|
||||
using vec4f = std::array<f24, 4>;
|
||||
|
||||
// An array of labels (incl pointers) to each compiled (to x64) PICA instruction
|
||||
std::array<Xbyak::Label, PICAShader::maxInstructionCount> instructionLabels;
|
||||
|
@ -33,8 +33,8 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
// Vector value of (1.0, 1.0, 1.0, 1.0) for SLT(i)/SGE(i)
|
||||
Label onesVector;
|
||||
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
|
||||
u32 recompilerPC = 0; // PC the recompiler is currently recompiling @
|
||||
u32 loopLevel = 0; // The current loop nesting level (0 = not in a loop)
|
||||
|
||||
bool haveSSE4_1 = false; // Shows if the CPU supports SSE4.1
|
||||
bool haveAVX = false; // Shows if the CPU supports AVX (NOT AVX2, NOT AVX512. Regular AVX)
|
||||
|
@ -116,10 +116,12 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
|
||||
MAKE_LOG_FUNCTION(log, shaderJITLogger)
|
||||
|
||||
public:
|
||||
using InstructionCallback = const void(*)(PICAShader& shaderUnit); // Callback type used for instructions
|
||||
public:
|
||||
// Callback type used for instructions
|
||||
using InstructionCallback = const void (*)(PICAShader& shaderUnit);
|
||||
// Callback type used for the JIT prologue. This is what the caller will call
|
||||
using PrologueCallback = const void(*)(PICAShader& shaderUnit, InstructionCallback cb);
|
||||
using PrologueCallback = const void (*)(PICAShader& shaderUnit, InstructionCallback cb);
|
||||
|
||||
PrologueCallback prologueCb = nullptr;
|
||||
|
||||
// Initialize our emitter with "allocSize" bytes of RWX memory
|
||||
|
@ -134,7 +136,7 @@ public:
|
|||
Helpers::panic("This CPU does not support SSE3. Please use the shader interpreter instead");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void compile(const PICAShader& shaderUnit);
|
||||
|
||||
// PC must be a valid entrypoint here. It doesn't have that much overhead in this case, so we use std::array<>::at() to assert it does
|
||||
|
@ -144,9 +146,7 @@ public:
|
|||
return reinterpret_cast<InstructionCallback>(ptr);
|
||||
}
|
||||
|
||||
PrologueCallback getPrologueCallback() {
|
||||
return prologueCb;
|
||||
}
|
||||
PrologueCallback getPrologueCallback() { return prologueCb; }
|
||||
};
|
||||
|
||||
#endif // x64 recompiler check
|
||||
#endif // x64 recompiler check
|
|
@ -1,39 +1,39 @@
|
|||
#pragma once
|
||||
#include <array>
|
||||
|
||||
#include "PICA/dynapica/shader_rec.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_unit.hpp"
|
||||
#include "config.hpp"
|
||||
#include "helpers.hpp"
|
||||
#include "logger.hpp"
|
||||
#include "memory.hpp"
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "PICA/shader_unit.hpp"
|
||||
#include "PICA/dynapica/shader_rec.hpp"
|
||||
#include "renderer_gl/renderer_gl.hpp"
|
||||
#include "PICA/pica_vertex.hpp"
|
||||
#include "renderer.hpp"
|
||||
|
||||
class GPU {
|
||||
static constexpr u32 regNum = 0x300;
|
||||
using vec4f = OpenGL::Vector<Floats::f24, 4>;
|
||||
using vec4f = std::array<Floats::f24, 4>;
|
||||
using Registers = std::array<u32, regNum>;
|
||||
|
||||
Memory& mem;
|
||||
EmulatorConfig& config;
|
||||
ShaderUnit shaderUnit;
|
||||
ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported
|
||||
ShaderJIT shaderJIT; // Doesn't do anything if JIT is disabled or not supported
|
||||
|
||||
u8* vram = nullptr;
|
||||
MAKE_LOG_FUNCTION(log, gpuLogger)
|
||||
|
||||
static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes
|
||||
static constexpr u32 maxAttribCount = 12; // Up to 12 vertex attributes
|
||||
static constexpr u32 vramSize = u32(6_MB);
|
||||
Registers regs; // GPU internal registers
|
||||
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
|
||||
Registers regs; // GPU internal registers
|
||||
std::array<vec4f, 16> currentAttributes; // Vertex attributes before being passed to the shader
|
||||
|
||||
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
||||
std::array<vec4f, 16> immediateModeAttributes; // Vertex attributes uploaded via immediate mode submission
|
||||
std::array<PICA::Vertex, 3> immediateModeVertices;
|
||||
uint immediateModeVertIndex;
|
||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||
uint immediateModeAttrIndex; // Index of the immediate mode attribute we're uploading
|
||||
|
||||
template <bool indexed, bool useShaderJIT>
|
||||
void drawArrays();
|
||||
|
@ -42,35 +42,33 @@ class GPU {
|
|||
void drawArrays(bool indexed);
|
||||
|
||||
struct AttribInfo {
|
||||
u32 offset = 0; // Offset from base vertex array
|
||||
int size = 0; // Bytes per vertex
|
||||
u32 offset = 0; // Offset from base vertex array
|
||||
int size = 0; // Bytes per vertex
|
||||
u32 config1 = 0;
|
||||
u32 config2 = 0;
|
||||
u32 componentCount = 0; // Number of components for the attribute
|
||||
u32 componentCount = 0; // Number of components for the attribute
|
||||
|
||||
u64 getConfigFull() {
|
||||
return u64(config1) | (u64(config2) << 32);
|
||||
}
|
||||
u64 getConfigFull() { return u64(config1) | (u64(config2) << 32); }
|
||||
};
|
||||
|
||||
u64 getVertexShaderInputConfig() {
|
||||
return u64(regs[PICA::InternalRegs::VertexShaderInputCfgLow]) | (u64(regs[PICA::InternalRegs::VertexShaderInputCfgHigh]) << 32);
|
||||
}
|
||||
|
||||
std::array<AttribInfo, maxAttribCount> attributeInfo; // Info for each of the 12 attributes
|
||||
u32 totalAttribCount = 0; // Number of vertex attributes to send to VS
|
||||
u32 fixedAttribMask = 0; // Which attributes are fixed?
|
||||
|
||||
u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range)
|
||||
u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted
|
||||
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
|
||||
std::array<AttribInfo, maxAttribCount> attributeInfo; // Info for each of the 12 attributes
|
||||
u32 totalAttribCount = 0; // Number of vertex attributes to send to VS
|
||||
u32 fixedAttribMask = 0; // Which attributes are fixed?
|
||||
|
||||
u32 fixedAttribIndex = 0; // Which fixed attribute are we writing to ([0, 11] range)
|
||||
u32 fixedAttribCount = 0; // How many attribute components have we written? When we get to 4 the attr will actually get submitted
|
||||
std::array<u32, 3> fixedAttrBuff; // Buffer to hold fixed attributes in until they get submitted
|
||||
|
||||
// Command processor pointers for GPU command lists
|
||||
u32* cmdBuffStart = nullptr;
|
||||
u32* cmdBuffEnd = nullptr;
|
||||
u32* cmdBuffCurr = nullptr;
|
||||
|
||||
Renderer renderer;
|
||||
std::unique_ptr<Renderer> renderer;
|
||||
PICA::Vertex getImmediateModeVertex();
|
||||
|
||||
public:
|
||||
|
@ -84,11 +82,10 @@ class GPU {
|
|||
// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
|
||||
bool lightingLUTDirty = false;
|
||||
|
||||
GPU(Memory& mem, GLStateManager& gl, EmulatorConfig& config);
|
||||
void initGraphicsContext() { renderer.initGraphicsContext(); }
|
||||
void getGraphicsContext() { renderer.getGraphicsContext(); }
|
||||
void display() { renderer.display(); }
|
||||
void screenshot(const std::string& name) { renderer.screenshot(name); }
|
||||
GPU(Memory& mem, EmulatorConfig& config);
|
||||
void initGraphicsContext() { renderer->initGraphicsContext(); }
|
||||
void display() { renderer->display(); }
|
||||
void screenshot(const std::string& name) { renderer->screenshot(name); }
|
||||
|
||||
void fireDMA(u32 dest, u32 source, u32 size);
|
||||
void reset();
|
||||
|
@ -107,13 +104,13 @@ class GPU {
|
|||
// TODO: Emulate the transfer engine & its registers
|
||||
// Then this can be emulated by just writing the appropriate values there
|
||||
void clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
|
||||
renderer.clearBuffer(startAddress, endAddress, value, control);
|
||||
renderer->clearBuffer(startAddress, endAddress, value, control);
|
||||
}
|
||||
|
||||
// TODO: Emulate the transfer engine & its registers
|
||||
// Then this can be emulated by just writing the appropriate values there
|
||||
void displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
renderer.displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags);
|
||||
renderer->displayTransfer(inputAddr, outputAddr, inputSize, outputSize, flags);
|
||||
}
|
||||
|
||||
// Read a value of type T from physical address paddr
|
||||
|
|
|
@ -2,13 +2,14 @@
|
|||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include "helpers.hpp"
|
||||
#include "opengl.hpp"
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/pica_hash.hpp"
|
||||
#include "helpers.hpp"
|
||||
|
||||
enum class ShaderType {
|
||||
Vertex, Geometry
|
||||
Vertex,
|
||||
Geometry,
|
||||
};
|
||||
|
||||
namespace ShaderOpcodes {
|
||||
|
@ -46,66 +47,66 @@ namespace ShaderOpcodes {
|
|||
SETEMIT = 0x2B,
|
||||
JMPC = 0x2C,
|
||||
JMPU = 0x2D,
|
||||
CMP1 = 0x2E, // Both of these instructions are CMP
|
||||
CMP1 = 0x2E, // Both of these instructions are CMP
|
||||
CMP2 = 0x2F,
|
||||
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
|
||||
MAD = 0x38 // Everything between 0x38-0x3F is a MAD but fuck it
|
||||
};
|
||||
}
|
||||
|
||||
// Note: All PICA f24 vec4 registers must have the alignas(16) specifier to make them easier to access in SSE/NEON code in the JIT
|
||||
class PICAShader {
|
||||
using f24 = Floats::f24;
|
||||
using vec4f = OpenGL::Vector<f24, 4>;
|
||||
using vec4f = std::array<f24, 4>;
|
||||
|
||||
struct Loop {
|
||||
u32 startingPC; // PC at the start of the loop
|
||||
u32 endingPC; // PC at the end of the loop
|
||||
u32 iterations; // How many iterations of the loop to run
|
||||
u32 increment; // How much to increment the loop counter after each iteration
|
||||
u32 startingPC; // PC at the start of the loop
|
||||
u32 endingPC; // PC at the end of the loop
|
||||
u32 iterations; // How many iterations of the loop to run
|
||||
u32 increment; // How much to increment the loop counter after each iteration
|
||||
};
|
||||
|
||||
// Info for ifc/ifu stack
|
||||
struct ConditionalInfo {
|
||||
u32 endingPC; // PC at the end of the if block (= DST)
|
||||
u32 newPC; // PC after the if block is done executing (= DST + NUM)
|
||||
u32 endingPC; // PC at the end of the if block (= DST)
|
||||
u32 newPC; // PC after the if block is done executing (= DST + NUM)
|
||||
};
|
||||
|
||||
struct CallInfo {
|
||||
u32 endingPC; // PC at the end of the function
|
||||
u32 returnPC; // PC to return to after the function ends
|
||||
u32 endingPC; // PC at the end of the function
|
||||
u32 returnPC; // PC to return to after the function ends
|
||||
};
|
||||
|
||||
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
|
||||
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
|
||||
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
|
||||
u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer?
|
||||
bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform?
|
||||
int bufferIndex; // Index of the next instruction to overwrite for shader uploads
|
||||
int opDescriptorIndex; // Index of the next operand descriptor we'll overwrite
|
||||
u32 floatUniformIndex = 0; // Which float uniform are we writing to? ([0, 95] range)
|
||||
u32 floatUniformWordCount = 0; // How many words have we buffered for the current uniform transfer?
|
||||
bool f32UniformTransfer = false; // Are we transferring an f32 uniform or an f24 uniform?
|
||||
|
||||
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
|
||||
std::array<u32, 4> floatUniformBuffer; // Buffer for temporarily caching float uniform data
|
||||
|
||||
public:
|
||||
public:
|
||||
// These are placed close to the temp registers and co because it helps the JIT generate better code
|
||||
u32 entrypoint = 0; // Initial shader PC
|
||||
u32 entrypoint = 0; // Initial shader PC
|
||||
u32 boolUniform;
|
||||
std::array<OpenGL::Vector<u8, 4>, 4> intUniforms;
|
||||
std::array<std::array<u8, 4>, 4> intUniforms;
|
||||
alignas(16) std::array<vec4f, 96> floatUniforms;
|
||||
|
||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> fixedAttributes; // Fixed vertex attributes
|
||||
alignas(16) std::array<vec4f, 16> inputs; // Attributes passed to the shader
|
||||
alignas(16) std::array<vec4f, 16> outputs;
|
||||
alignas(16) vec4f dummy = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() }); // Dummy register used by the JIT
|
||||
alignas(16) vec4f dummy = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()}); // Dummy register used by the JIT
|
||||
|
||||
protected:
|
||||
protected:
|
||||
std::array<u32, 128> operandDescriptors;
|
||||
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
|
||||
OpenGL::Vector<s32, 2> addrRegister; // Address register
|
||||
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
|
||||
alignas(16) std::array<vec4f, 16> tempRegisters; // General purpose registers the shader can use for temp values
|
||||
std::array<s32, 2> addrRegister; // Address register
|
||||
bool cmpRegister[2]; // Comparison registers where the result of CMP is stored in
|
||||
u32 loopCounter;
|
||||
|
||||
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
|
||||
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
|
||||
u32 ifIndex = 0; // The index of our IF stack
|
||||
u32 callIndex = 0; // The index of our CALL stack
|
||||
u32 pc = 0; // Program counter: Index of the next instruction we're going to execute
|
||||
u32 loopIndex = 0; // The index of our loop stack (0 = empty, 4 = full)
|
||||
u32 ifIndex = 0; // The index of our IF stack
|
||||
u32 callIndex = 0; // The index of our CALL stack
|
||||
|
||||
std::array<Loop, 4> loopInfo;
|
||||
std::array<ConditionalInfo, 8> conditionalInfo;
|
||||
|
@ -117,7 +118,7 @@ protected:
|
|||
// Ideally we want to be able to support multiple different types of hash depending on compilation settings, but let's get this working first
|
||||
using Hash = PICAHash::HashType;
|
||||
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastCodeHash = 0; // Last hash computed for the shader code (Used for the JIT caching mechanism)
|
||||
Hash lastOpdescHash = 0; // Last hash computed for the operand descriptors (Also used for the JIT)
|
||||
|
||||
bool codeHashDirty = false;
|
||||
|
@ -130,7 +131,7 @@ protected:
|
|||
vec4f getSource(u32 source);
|
||||
vec4f& getDest(u32 dest);
|
||||
|
||||
private:
|
||||
private:
|
||||
// Interpreter functions for the various shader functions
|
||||
void add(u32 instruction);
|
||||
void call(u32 instruction);
|
||||
|
@ -171,13 +172,13 @@ private:
|
|||
bool negate;
|
||||
|
||||
using namespace Helpers;
|
||||
if constexpr (sourceIndex == 1) { // SRC1
|
||||
if constexpr (sourceIndex == 1) { // SRC1
|
||||
negate = (getBit<4>(opDescriptor)) != 0;
|
||||
compSwizzle = getBits<5, 8>(opDescriptor);
|
||||
} else if constexpr (sourceIndex == 2) { // SRC2
|
||||
} else if constexpr (sourceIndex == 2) { // SRC2
|
||||
negate = (getBit<13>(opDescriptor)) != 0;
|
||||
compSwizzle = getBits<14, 8>(opDescriptor);
|
||||
} else if constexpr (sourceIndex == 3) { // SRC3
|
||||
} else if constexpr (sourceIndex == 3) { // SRC3
|
||||
negate = (getBit<22>(opDescriptor)) != 0;
|
||||
compSwizzle = getBits<23, 8>(opDescriptor);
|
||||
}
|
||||
|
@ -185,8 +186,8 @@ private:
|
|||
// Iterate through every component of the swizzled vector in reverse order
|
||||
// And get which source component's index to match it with
|
||||
for (int comp = 0; comp < 4; comp++) {
|
||||
int index = compSwizzle & 3; // Get index for this component
|
||||
compSwizzle >>= 2; // Move to next component index
|
||||
int index = compSwizzle & 3; // Get index for this component
|
||||
compSwizzle >>= 2; // Move to next component index
|
||||
ret[3 - comp] = source[index];
|
||||
}
|
||||
|
||||
|
@ -212,39 +213,35 @@ private:
|
|||
u8 getIndexedSource(u32 source, u32 index);
|
||||
bool isCondTrue(u32 instruction);
|
||||
|
||||
public:
|
||||
public:
|
||||
static constexpr size_t maxInstructionCount = 4096;
|
||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||
std::array<u32, maxInstructionCount> loadedShader; // Currently loaded & active shader
|
||||
std::array<u32, maxInstructionCount> bufferedShader; // Shader to be transferred when the SH_CODETRANSFER_END reg gets written to
|
||||
|
||||
PICAShader(ShaderType type) : type(type) {}
|
||||
|
||||
// Theese functions are in the header to be inlined more easily, though with LTO I hope I'll be able to move them
|
||||
void finalize() {
|
||||
std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32));
|
||||
}
|
||||
void finalize() { std::memcpy(&loadedShader[0], &bufferedShader[0], 4096 * sizeof(u32)); }
|
||||
|
||||
void setBufferIndex(u32 index) {
|
||||
bufferIndex = index & 0xfff;
|
||||
}
|
||||
|
||||
void setOpDescriptorIndex(u32 index) {
|
||||
opDescriptorIndex = index & 0x7f;
|
||||
}
|
||||
void setBufferIndex(u32 index) { bufferIndex = index & 0xfff; }
|
||||
void setOpDescriptorIndex(u32 index) { opDescriptorIndex = index & 0x7f; }
|
||||
|
||||
void uploadWord(u32 word) {
|
||||
if (bufferIndex >= 4095) Helpers::panic("o no, shader upload overflew");
|
||||
if (bufferIndex >= 4095) {
|
||||
Helpers::panic("o no, shader upload overflew");
|
||||
}
|
||||
|
||||
bufferedShader[bufferIndex++] = word;
|
||||
bufferIndex &= 0xfff;
|
||||
|
||||
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
codeHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
}
|
||||
|
||||
void uploadDescriptor(u32 word) {
|
||||
operandDescriptors[opDescriptorIndex++] = word;
|
||||
opDescriptorIndex &= 0x7f;
|
||||
|
||||
opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
opdescHashDirty = true; // Signal the JIT if necessary that the program hash has potentially changed
|
||||
}
|
||||
|
||||
void setFloatUniformIndex(u32 word) {
|
||||
|
@ -255,23 +252,24 @@ public:
|
|||
|
||||
void uploadFloatUniform(u32 word) {
|
||||
floatUniformBuffer[floatUniformWordCount++] = word;
|
||||
if (floatUniformIndex >= 96)
|
||||
if (floatUniformIndex >= 96) {
|
||||
Helpers::panic("[PICA] Tried to write float uniform %d", floatUniformIndex);
|
||||
}
|
||||
|
||||
if ((f32UniformTransfer && floatUniformWordCount >= 4) || (!f32UniformTransfer && floatUniformWordCount >= 3)) {
|
||||
vec4f& uniform = floatUniforms[floatUniformIndex++];
|
||||
floatUniformWordCount = 0;
|
||||
|
||||
if (f32UniformTransfer) {
|
||||
uniform.x() = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
||||
uniform.y() = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
||||
uniform.z() = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
|
||||
uniform.w() = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
|
||||
uniform[0] = f24::fromFloat32(*(float*)&floatUniformBuffer[3]);
|
||||
uniform[1] = f24::fromFloat32(*(float*)&floatUniformBuffer[2]);
|
||||
uniform[2] = f24::fromFloat32(*(float*)&floatUniformBuffer[1]);
|
||||
uniform[3] = f24::fromFloat32(*(float*)&floatUniformBuffer[0]);
|
||||
} else {
|
||||
uniform.x() = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
|
||||
uniform.y() = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
|
||||
uniform.z() = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform.w() = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
uniform[0] = f24::fromRaw(floatUniformBuffer[2] & 0xffffff);
|
||||
uniform[1] = f24::fromRaw(((floatUniformBuffer[1] & 0xffff) << 8) | (floatUniformBuffer[2] >> 24));
|
||||
uniform[2] = f24::fromRaw(((floatUniformBuffer[0] & 0xff) << 16) | (floatUniformBuffer[1] >> 16));
|
||||
uniform[3] = f24::fromRaw(floatUniformBuffer[0] >> 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -280,10 +278,10 @@ public:
|
|||
using namespace Helpers;
|
||||
|
||||
auto& u = intUniforms[index];
|
||||
u.x() = word & 0xff;
|
||||
u.y() = getBits<8, 8>(word);
|
||||
u.z() = getBits<16, 8>(word);
|
||||
u.w() = getBits<24, 8>(word);
|
||||
u[0] = word & 0xff;
|
||||
u[1] = getBits<8, 8>(word);
|
||||
u[2] = getBits<16, 8>(word);
|
||||
u[3] = getBits<24, 8>(word);
|
||||
}
|
||||
|
||||
void run();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue