mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-20 04:29:13 +12:00
Merge branch 'master' into ir
This commit is contained in:
commit
d470a8c8d3
46 changed files with 2206 additions and 1206 deletions
|
@ -1,6 +1,7 @@
|
|||
#include "config.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "helpers.hpp"
|
||||
#include "toml.hpp"
|
||||
|
@ -9,6 +10,8 @@
|
|||
// We are legally allowed, as per the author's wish, to use the above code without any licensing restrictions
|
||||
// However we still want to follow the license as closely as possible and offer the proper attributions.
|
||||
|
||||
EmulatorConfig::EmulatorConfig(const std::filesystem::path& path) { load(path); }
|
||||
|
||||
void EmulatorConfig::load(const std::filesystem::path& path) {
|
||||
// If the configuration file does not exist, create it and return
|
||||
std::error_code error;
|
||||
|
@ -31,6 +34,17 @@ void EmulatorConfig::load(const std::filesystem::path& path) {
|
|||
if (gpuResult.is_ok()) {
|
||||
auto gpu = gpuResult.unwrap();
|
||||
|
||||
// Get renderer
|
||||
auto rendererName = toml::find_or<std::string>(gpu, "Renderer", "OpenGL");
|
||||
auto configRendererType = Renderer::typeFromString(rendererName);
|
||||
|
||||
if (configRendererType.has_value()) {
|
||||
rendererType = configRendererType.value();
|
||||
} else {
|
||||
Helpers::warn("Invalid renderer specified: %s\n", rendererName.c_str());
|
||||
rendererType = RendererType::OpenGL;
|
||||
}
|
||||
|
||||
shaderJitEnabled = toml::find_or<toml::boolean>(gpu, "EnableShaderJIT", false);
|
||||
}
|
||||
}
|
||||
|
@ -43,7 +57,7 @@ void EmulatorConfig::save(const std::filesystem::path& path) {
|
|||
if (std::filesystem::exists(path, error)) {
|
||||
try {
|
||||
data = toml::parse<toml::preserve_comments>(path);
|
||||
} catch (std::exception& ex) {
|
||||
} catch (const std::exception& ex) {
|
||||
Helpers::warn("Exception trying to parse config file. Exception: %s\n", ex.what());
|
||||
return;
|
||||
}
|
||||
|
@ -55,6 +69,7 @@ void EmulatorConfig::save(const std::filesystem::path& path) {
|
|||
}
|
||||
|
||||
data["GPU"]["EnableShaderJIT"] = shaderJitEnabled;
|
||||
data["GPU"]["Renderer"] = std::string(Renderer::typeToString(rendererType));
|
||||
|
||||
std::ofstream file(path, std::ios::out);
|
||||
file << data;
|
||||
|
|
|
@ -61,11 +61,14 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) {
|
|||
|
||||
// Tail call to shader code entrypoint
|
||||
jmp(arg2);
|
||||
align(16);
|
||||
// Scan the shader code for call instructions and add them to the list of possible return PCs. We need to do this because the PICA callstack works
|
||||
// Pretty weirdly
|
||||
scanForCalls(shaderUnit);
|
||||
|
||||
// Scan the code for call, exp2, log2, etc instructions which need some special care
|
||||
// After that, emit exp2 and log2 functions if the corresponding instructions are present
|
||||
scanCode(shaderUnit);
|
||||
if (codeHasExp2) exp2Func = emitExp2Func();
|
||||
if (codeHasLog2) log2Func = emitLog2Func();
|
||||
|
||||
align(16);
|
||||
// Compile every instruction in the shader
|
||||
// This sounds horrible but the PICA instruction memory is tiny, and most of the time it's padded wtih nops that compile to nothing
|
||||
recompilerPC = 0;
|
||||
|
@ -73,17 +76,23 @@ void ShaderEmitter::compile(const PICAShader& shaderUnit) {
|
|||
compileUntil(shaderUnit, PICAShader::maxInstructionCount);
|
||||
}
|
||||
|
||||
void ShaderEmitter::scanForCalls(const PICAShader& shaderUnit) {
|
||||
void ShaderEmitter::scanCode(const PICAShader& shaderUnit) {
|
||||
returnPCs.clear();
|
||||
|
||||
for (u32 i = 0; i < PICAShader::maxInstructionCount; i++) {
|
||||
const u32 instruction = shaderUnit.loadedShader[i];
|
||||
const u32 opcode = instruction >> 26;
|
||||
|
||||
if (isCall(instruction)) {
|
||||
const u32 num = instruction & 0xff;
|
||||
const u32 dest = getBits<10, 12>(instruction);
|
||||
const u32 returnPC = num + dest; // Add them to get the return PC
|
||||
|
||||
returnPCs.push_back(returnPC);
|
||||
} else if (opcode == ShaderOpcodes::EX2) {
|
||||
codeHasExp2 = true;
|
||||
} else if (opcode == ShaderOpcodes::LG2) {
|
||||
codeHasLog2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -877,7 +886,6 @@ void ShaderEmitter::recLOOP(const PICAShader& shader, u32 instruction) {
|
|||
loopLevel--;
|
||||
}
|
||||
|
||||
// SSE does not have a log2 instruction so we temporarily emulate this using x87 FPU
|
||||
void ShaderEmitter::recLG2(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src = getBits<12, 7>(instruction);
|
||||
|
@ -885,30 +893,16 @@ void ShaderEmitter::recLG2(const PICAShader& shader, u32 instruction) {
|
|||
const u32 dest = getBits<21, 5>(instruction);
|
||||
const u32 writeMask = getBits<0, 4>(operandDescriptor);
|
||||
|
||||
// Load swizzled source, push 1.0 to the x87 stack
|
||||
loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor);
|
||||
fld1();
|
||||
|
||||
// Push source to the x87 stack
|
||||
movd(eax, src1_xmm);
|
||||
push(rax);
|
||||
fld(dword[rsp]);
|
||||
|
||||
// Perform log2, load result to src1_xmm, write it back and undo the previous push rax
|
||||
fyl2x();
|
||||
fstp(dword[rsp]);
|
||||
movss(src1_xmm, dword[rsp]);
|
||||
add(rsp, 8);
|
||||
|
||||
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
|
||||
// Otherwise we do
|
||||
call(log2Func); // Result is output in src1_xmm
|
||||
|
||||
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
|
||||
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
|
||||
}
|
||||
|
||||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
// SSE does not have an exp2 instruction so we temporarily emulate this using x87 FPU
|
||||
void ShaderEmitter::recEX2(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src = getBits<12, 7>(instruction);
|
||||
|
@ -917,31 +911,12 @@ void ShaderEmitter::recEX2(const PICAShader& shader, u32 instruction) {
|
|||
const u32 writeMask = getBits<0, 4>(operandDescriptor);
|
||||
|
||||
loadRegister<1>(src1_xmm, shader, src, idx, operandDescriptor);
|
||||
call(exp2Func); // Result is output in src1_xmm
|
||||
|
||||
// Push source to the x87 stack, then do some insane compiler-generated x87 math
|
||||
movd(eax, src1_xmm);
|
||||
push(rax);
|
||||
fld(dword[rsp]);
|
||||
|
||||
fld(st0);
|
||||
frndint();
|
||||
fsub(st1, st0);
|
||||
fxch(st1);
|
||||
f2xm1();
|
||||
fadd(dword[rip + onesVector]);
|
||||
fscale();
|
||||
|
||||
// Load result to src1_xmm, write it back and undo the previous push rax
|
||||
fstp(st1);
|
||||
fstp(dword[rsp]);
|
||||
movss(src1_xmm, dword[rsp]);
|
||||
add(rsp, 8);
|
||||
|
||||
// If we only write back the x component to the result, we needn't perform a shuffle to do res = res.xxxx
|
||||
// Otherwise we do
|
||||
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
|
||||
shufps(src1_xmm, src1_xmm, 0); // src1_xmm = src1_xmm.xxxx
|
||||
}
|
||||
|
||||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
|
@ -962,6 +937,228 @@ void ShaderEmitter::printLog(const PICAShader& shaderUnit) {
|
|||
printf("cmp: (%d, %d)\n", shaderUnit.cmpRegister[0], shaderUnit.cmpRegister[1]);
|
||||
}
|
||||
|
||||
// For EXP2/LOG2, we have permission to adjust and relicense the SSE implementation from Citra for this project from the original authors
|
||||
// So we do it since EXP2/LOG2 are pretty terrible to implement.
|
||||
// ABI: Input is in the bottom bits of src1_xmm, same for output. If the result needs swizzling, the caller must handle it
|
||||
// Assume src1, src2, scratch1, scratch2, eax, edx all thrashed
|
||||
|
||||
Xbyak::Label ShaderEmitter::emitLog2Func() {
|
||||
Xbyak::Label subroutine;
|
||||
|
||||
// This code uses the fact that log2(float) = log2(2^exponent * mantissa)
|
||||
// = log2(2^exponent) + log2(mantissa) = exponent + log2(mantissa) where mantissa has a limited range of values
|
||||
// https://stackoverflow.com/a/45787548
|
||||
|
||||
// SSE does not have a log instruction, thus we must approximate.
|
||||
// We perform this approximation first performing a range reduction into the range [1.0, 2.0).
|
||||
// A minimax polynomial which was fit for the function log2(x) / (x - 1) is then evaluated.
|
||||
// We multiply the result by (x - 1) then restore the result into the appropriate range.
|
||||
|
||||
// Coefficients for the minimax polynomial.
|
||||
// f(x) computes approximately log2(x) / (x - 1).
|
||||
// f(x) = c4 + x * (c3 + x * (c2 + x * (c1 + x * c0)).
|
||||
// We align the table of coefficients to 64 bytes, so that the whole thing will fit in 1 cache line
|
||||
align(64);
|
||||
const void* c0 = getCurr();
|
||||
dd(0x3d74552f);
|
||||
const void* c1 = getCurr();
|
||||
dd(0xbeee7397);
|
||||
const void* c2 = getCurr();
|
||||
dd(0x3fbd96dd);
|
||||
const void* c3 = getCurr();
|
||||
dd(0xc02153f6);
|
||||
const void* c4 = getCurr();
|
||||
dd(0x4038d96c);
|
||||
|
||||
align(16);
|
||||
const void* negative_infinity_vector = getCurr();
|
||||
dd(0xff800000);
|
||||
dd(0xff800000);
|
||||
dd(0xff800000);
|
||||
dd(0xff800000);
|
||||
const void* default_qnan_vector = getCurr();
|
||||
dd(0x7fc00000);
|
||||
dd(0x7fc00000);
|
||||
dd(0x7fc00000);
|
||||
dd(0x7fc00000);
|
||||
|
||||
Xbyak::Label inputIsNan, inputIsZero, inputOutOfRange;
|
||||
|
||||
align(16);
|
||||
L(inputOutOfRange);
|
||||
je(inputIsZero);
|
||||
movaps(src1_xmm, xword[rip + default_qnan_vector]);
|
||||
ret();
|
||||
L(inputIsZero);
|
||||
movaps(src1_xmm, xword[rip + negative_infinity_vector]);
|
||||
ret();
|
||||
|
||||
align(16);
|
||||
L(subroutine);
|
||||
|
||||
// Here we handle edge cases: input in {NaN, 0, -Inf, Negative}.
|
||||
xorps(scratch1, scratch1);
|
||||
ucomiss(scratch1, src1_xmm);
|
||||
jp(inputIsNan);
|
||||
jae(inputOutOfRange);
|
||||
|
||||
// Split input: SRC1=MANT[1,2) SCRATCH2=Exponent
|
||||
if (cpuCaps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
|
||||
vgetexpss(scratch2, src1_xmm, src1_xmm);
|
||||
vgetmantss(src1_xmm, src1_xmm, src1_xmm, 0);
|
||||
} else {
|
||||
movd(eax, src1_xmm);
|
||||
mov(edx, eax);
|
||||
and_(eax, 0x7f800000);
|
||||
and_(edx, 0x007fffff);
|
||||
or_(edx, 0x3f800000);
|
||||
movd(src1_xmm, edx);
|
||||
// SRC1 now contains the mantissa of the input.
|
||||
shr(eax, 23);
|
||||
sub(eax, 0x7f);
|
||||
cvtsi2ss(scratch2, eax);
|
||||
// scratch2 now contains the exponent of the input.
|
||||
}
|
||||
|
||||
movss(scratch1, xword[rip + c0]);
|
||||
|
||||
// Complete computation of polynomial
|
||||
if (haveFMA3) {
|
||||
vfmadd213ss(scratch1, src1_xmm, xword[rip + c1]);
|
||||
vfmadd213ss(scratch1, src1_xmm, xword[rip + c2]);
|
||||
vfmadd213ss(scratch1, src1_xmm, xword[rip + c3]);
|
||||
vfmadd213ss(scratch1, src1_xmm, xword[rip + c4]);
|
||||
subss(src1_xmm, dword[rip + onesVector]);
|
||||
vfmadd231ss(scratch2, scratch1, src1_xmm);
|
||||
} else {
|
||||
mulss(scratch1, src1_xmm);
|
||||
addss(scratch1, xword[rip + c1]);
|
||||
mulss(scratch1, src1_xmm);
|
||||
addss(scratch1, xword[rip + c2]);
|
||||
mulss(scratch1, src1_xmm);
|
||||
addss(scratch1, xword[rip + c3]);
|
||||
mulss(scratch1, src1_xmm);
|
||||
subss(src1_xmm, dword[rip + onesVector]);
|
||||
addss(scratch1, xword[rip + c4]);
|
||||
mulss(scratch1, src1_xmm);
|
||||
addss(scratch2, scratch1);
|
||||
}
|
||||
|
||||
xorps(src1_xmm, src1_xmm); // break dependency chain
|
||||
movss(src1_xmm, scratch2);
|
||||
L(inputIsNan);
|
||||
|
||||
ret();
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
Xbyak::Label ShaderEmitter::emitExp2Func() {
|
||||
Xbyak::Label subroutine;
|
||||
|
||||
// SSE does not have a exp instruction, thus we must approximate.
|
||||
// We perform this approximation first performaing a range reduction into the range [-0.5, 0.5).
|
||||
// A minimax polynomial which was fit for the function exp2(x) is then evaluated.
|
||||
// We then restore the result into the appropriate range.
|
||||
|
||||
// Similarly to log2, we align our literal pool to 64 bytes to make sure the whole thing fits in 1 cache line
|
||||
align(64);
|
||||
const void* input_max = getCurr();
|
||||
dd(0x43010000);
|
||||
const void* input_min = getCurr();
|
||||
dd(0xc2fdffff);
|
||||
const void* c0 = getCurr();
|
||||
dd(0x3c5dbe69);
|
||||
const void* half = getCurr();
|
||||
dd(0x3f000000);
|
||||
const void* c1 = getCurr();
|
||||
dd(0x3d5509f9);
|
||||
const void* c2 = getCurr();
|
||||
dd(0x3e773cc5);
|
||||
const void* c3 = getCurr();
|
||||
dd(0x3f3168b3);
|
||||
const void* c4 = getCurr();
|
||||
dd(0x3f800016);
|
||||
|
||||
Xbyak::Label retLabel;
|
||||
|
||||
align(16);
|
||||
L(subroutine);
|
||||
|
||||
// Handle edge cases
|
||||
ucomiss(src1_xmm, src1_xmm);
|
||||
jp(retLabel);
|
||||
|
||||
// Decompose input:
|
||||
// SCRATCH=2^round(input)
|
||||
// SRC1=input-round(input) [-0.5, 0.5)
|
||||
if (cpuCaps.has(Cpu::tAVX512F | Cpu::tAVX512VL)) {
|
||||
// Cheat a bit and store ones in src2 since the register is unused
|
||||
vmovaps(src2_xmm, xword[rip + onesVector]);
|
||||
// input - 0.5
|
||||
vsubss(scratch1, src1_xmm, xword[rip + half]);
|
||||
|
||||
// trunc(input - 0.5)
|
||||
vrndscaless(scratch2, scratch1, scratch1, _MM_FROUND_TRUNC);
|
||||
|
||||
// SCRATCH = 1 * 2^(trunc(input - 0.5))
|
||||
vscalefss(scratch1, src2_xmm, scratch2);
|
||||
|
||||
// SRC1 = input-trunc(input - 0.5)
|
||||
vsubss(src1_xmm, src1_xmm, scratch2);
|
||||
} else {
|
||||
// Clamp to maximum range since we shift the value directly into the exponent.
|
||||
minss(src1_xmm, xword[rip + input_max]);
|
||||
maxss(src1_xmm, xword[rip + input_min]);
|
||||
|
||||
if (cpuCaps.has(Cpu::tAVX)) {
|
||||
vsubss(scratch1, src1_xmm, xword[rip + half]);
|
||||
} else {
|
||||
movss(scratch1, src1_xmm);
|
||||
subss(scratch1, xword[rip + half]);
|
||||
}
|
||||
|
||||
if (cpuCaps.has(Cpu::tSSE41)) {
|
||||
roundss(scratch1, scratch1, _MM_FROUND_TRUNC);
|
||||
cvtss2si(eax, scratch1);
|
||||
} else {
|
||||
cvtss2si(eax, scratch1);
|
||||
cvtsi2ss(scratch1, eax);
|
||||
}
|
||||
// SCRATCH now contains input rounded to the nearest integer.
|
||||
add(eax, 0x7f);
|
||||
subss(src1_xmm, scratch1);
|
||||
// SRC1 contains input - round(input), which is in [-0.5, 0.5).
|
||||
shl(eax, 23);
|
||||
movd(scratch1, eax);
|
||||
// SCRATCH contains 2^(round(input)).
|
||||
}
|
||||
|
||||
// Complete computation of polynomial.
|
||||
movss(scratch2, xword[rip + c0]);
|
||||
|
||||
if (haveFMA3) {
|
||||
vfmadd213ss(scratch2, src1_xmm, xword[rip + c1]);
|
||||
vfmadd213ss(scratch2, src1_xmm, xword[rip + c2]);
|
||||
vfmadd213ss(scratch2, src1_xmm, xword[rip + c3]);
|
||||
vfmadd213ss(src1_xmm, scratch2, xword[rip + c4]);
|
||||
} else {
|
||||
mulss(scratch2, src1_xmm);
|
||||
addss(scratch2, xword[rip + c1]);
|
||||
mulss(scratch2, src1_xmm);
|
||||
addss(scratch2, xword[rip + c2]);
|
||||
mulss(scratch2, src1_xmm);
|
||||
addss(scratch2, xword[rip + c3]);
|
||||
mulss(src1_xmm, scratch2);
|
||||
addss(src1_xmm, xword[rip + c4]);
|
||||
}
|
||||
|
||||
mulss(src1_xmm, scratch1);
|
||||
L(retLabel);
|
||||
|
||||
ret();
|
||||
return subroutine;
|
||||
}
|
||||
|
||||
// As we mentioned above, this function is uber slow because we don't expect the shader JIT to call HLL functions in real scenarios
|
||||
// Aside from debugging code. So we don't care for this function to be performant or anything of the like. It is quick and dirty
|
||||
// And mostly meant to be used for generating logs to diff the JIT and interpreter
|
||||
|
|
|
@ -2,19 +2,45 @@
|
|||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cstdio>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
#include "renderer_null/renderer_null.hpp"
|
||||
#ifdef PANDA3DS_ENABLE_OPENGL
|
||||
#include "renderer_gl/renderer_gl.hpp"
|
||||
#endif
|
||||
|
||||
using namespace Floats;
|
||||
|
||||
// Note: For when we have multiple backends, the GL state manager can stay here and have the constructor for the Vulkan-or-whatever renderer ignore it
|
||||
// Thus, our GLStateManager being here does not negatively impact renderer-agnosticness
|
||||
GPU::GPU(Memory& mem, GLStateManager& gl, EmulatorConfig& config) : mem(mem), renderer(*this, gl, regs), config(config) {
|
||||
GPU::GPU(Memory& mem, EmulatorConfig& config) : mem(mem), config(config) {
|
||||
vram = new u8[vramSize];
|
||||
mem.setVRAM(vram); // Give the bus a pointer to our VRAM
|
||||
mem.setVRAM(vram); // Give the bus a pointer to our VRAM
|
||||
|
||||
switch (config.rendererType) {
|
||||
case RendererType::Null: {
|
||||
renderer.reset(new RendererNull(*this, regs));
|
||||
break;
|
||||
}
|
||||
#ifdef PANDA3DS_ENABLE_OPENGL
|
||||
case RendererType::OpenGL: {
|
||||
renderer.reset(new RendererGL(*this, regs));
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
case RendererType::Vulkan: {
|
||||
Helpers::panic("Vulkan is not supported yet, please pick another renderer");
|
||||
}
|
||||
|
||||
default: {
|
||||
Helpers::panic("Rendering backend not supported: %s", Renderer::typeToString(config.rendererType));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPU::reset() {
|
||||
|
@ -41,7 +67,7 @@ void GPU::reset() {
|
|||
e.config2 = 0;
|
||||
}
|
||||
|
||||
renderer.reset();
|
||||
renderer->reset();
|
||||
}
|
||||
|
||||
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
|
||||
|
@ -73,15 +99,14 @@ void GPU::drawArrays() {
|
|||
// Base address for vertex attributes
|
||||
// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
|
||||
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
|
||||
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
||||
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
|
||||
|
||||
// Configures the type of primitive and the number of vertex shader outputs
|
||||
const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig];
|
||||
const PICA::PrimType primType = static_cast<PICA::PrimType>(Helpers::getBits<8, 2>(primConfig));
|
||||
if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize");
|
||||
|
||||
if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) ||
|
||||
(primType == PICA::PrimType::TriangleStrip && vertexCount < 3) ||
|
||||
if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) ||
|
||||
(primType == PICA::PrimType::TriangleFan && vertexCount < 3)) {
|
||||
Helpers::panic("Invalid vertex count for primitive. Type: %d, vert count: %d\n", primType, vertexCount);
|
||||
}
|
||||
|
@ -89,10 +114,10 @@ void GPU::drawArrays() {
|
|||
// Get the configuration for the index buffer, used only for indexed drawing
|
||||
u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig];
|
||||
u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff);
|
||||
bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit
|
||||
bool shortIndex = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit
|
||||
|
||||
// Stuff the global attribute config registers in one u64 to make attr parsing easier
|
||||
// TODO: Cache this when the vertex attribute format registers are written to
|
||||
// TODO: Cache this when the vertex attribute format registers are written to
|
||||
u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32);
|
||||
|
||||
if constexpr (!indexed) {
|
||||
|
@ -111,24 +136,24 @@ void GPU::drawArrays() {
|
|||
constexpr size_t vertexCacheSize = 64;
|
||||
|
||||
struct {
|
||||
std::bitset<vertexCacheSize> validBits{0}; // Shows which tags are valid. If the corresponding bit is 1, then there's an entry
|
||||
std::array<u32, vertexCacheSize> ids; // IDs (ie indices of the cached vertices in the 3DS vertex buffer)
|
||||
std::array<u32, vertexCacheSize> bufferPositions; // Positions of the cached vertices in our own vertex buffer
|
||||
std::bitset<vertexCacheSize> validBits{0}; // Shows which tags are valid. If the corresponding bit is 1, then there's an entry
|
||||
std::array<u32, vertexCacheSize> ids; // IDs (ie indices of the cached vertices in the 3DS vertex buffer)
|
||||
std::array<u32, vertexCacheSize> bufferPositions; // Positions of the cached vertices in our own vertex buffer
|
||||
} vertexCache;
|
||||
|
||||
|
||||
for (u32 i = 0; i < vertexCount; i++) {
|
||||
u32 vertexIndex; // Index of the vertex in the VBO for indexed rendering
|
||||
u32 vertexIndex; // Index of the vertex in the VBO for indexed rendering
|
||||
|
||||
if constexpr (!indexed) {
|
||||
vertexIndex = i + regs[PICA::InternalRegs::VertexOffsetReg];
|
||||
} else {
|
||||
if (shortIndex) {
|
||||
auto ptr = getPointerPhys<u16>(indexBufferPointer);
|
||||
vertexIndex = *ptr; // TODO: This is very unsafe
|
||||
vertexIndex = *ptr; // TODO: This is very unsafe
|
||||
indexBufferPointer += 2;
|
||||
} else {
|
||||
auto ptr = getPointerPhys<u8>(indexBufferPointer);
|
||||
vertexIndex = *ptr; // TODO: This is also very unsafe
|
||||
vertexIndex = *ptr; // TODO: This is also very unsafe
|
||||
indexBufferPointer += 1;
|
||||
}
|
||||
}
|
||||
|
@ -152,22 +177,22 @@ void GPU::drawArrays() {
|
|||
}
|
||||
|
||||
int attrCount = 0;
|
||||
int buffer = 0; // Vertex buffer index for non-fixed attributes
|
||||
int buffer = 0; // Vertex buffer index for non-fixed attributes
|
||||
|
||||
while (attrCount < totalAttribCount) {
|
||||
// Check if attribute is fixed or not
|
||||
if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute
|
||||
vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works?
|
||||
if (fixedAttribMask & (1 << attrCount)) { // Fixed attribute
|
||||
vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[attrCount]; // TODO: Is this how it works?
|
||||
vec4f& inputAttr = currentAttributes[attrCount];
|
||||
std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr
|
||||
std::memcpy(&inputAttr, &fixedAttr, sizeof(vec4f)); // Copy fixed attr to input attr
|
||||
attrCount++;
|
||||
} else { // Non-fixed attribute
|
||||
auto& attr = attributeInfo[buffer]; // Get information for this attribute
|
||||
u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32)
|
||||
} else { // Non-fixed attribute
|
||||
auto& attr = attributeInfo[buffer]; // Get information for this attribute
|
||||
u64 attrCfg = attr.getConfigFull(); // Get config1 | (config2 << 32)
|
||||
u32 attrAddress = vertexBase + attr.offset + (vertexIndex * attr.size);
|
||||
|
||||
for (int j = 0; j < attr.componentCount; j++) {
|
||||
uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg
|
||||
uint index = (attrCfg >> (j * 4)) & 0xf; // Get index of attribute in vertexCfg
|
||||
|
||||
// Vertex attributes used as padding
|
||||
// 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively
|
||||
|
@ -179,15 +204,15 @@ void GPU::drawArrays() {
|
|||
}
|
||||
|
||||
u32 attribInfo = (vertexCfg >> (index * 4)) & 0xf;
|
||||
u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float)
|
||||
u32 size = (attribInfo >> 2) + 1; // Total number of components
|
||||
u32 attribType = attribInfo & 0x3; // Type of attribute(sbyte/ubyte/short/float)
|
||||
u32 size = (attribInfo >> 2) + 1; // Total number of components
|
||||
|
||||
//printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size);
|
||||
// printf("vertex_attribute_strides[%d] = %d\n", attrCount, attr.size);
|
||||
vec4f& attribute = currentAttributes[attrCount];
|
||||
uint component; // Current component
|
||||
uint component; // Current component
|
||||
|
||||
switch (attribType) {
|
||||
case 0: { // Signed byte
|
||||
case 0: { // Signed byte
|
||||
s8* ptr = getPointerPhys<s8>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
|
@ -197,7 +222,7 @@ void GPU::drawArrays() {
|
|||
break;
|
||||
}
|
||||
|
||||
case 1: { // Unsigned byte
|
||||
case 1: { // Unsigned byte
|
||||
u8* ptr = getPointerPhys<u8>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
|
@ -207,7 +232,7 @@ void GPU::drawArrays() {
|
|||
break;
|
||||
}
|
||||
|
||||
case 2: { // Short
|
||||
case 2: { // Short
|
||||
s16* ptr = getPointerPhys<s16>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = static_cast<float>(*ptr++);
|
||||
|
@ -217,7 +242,7 @@ void GPU::drawArrays() {
|
|||
break;
|
||||
}
|
||||
|
||||
case 3: { // Float
|
||||
case 3: { // Float
|
||||
float* ptr = getPointerPhys<float>(attrAddress);
|
||||
for (component = 0; component < size; component++) {
|
||||
float val = *ptr++;
|
||||
|
@ -251,8 +276,8 @@ void GPU::drawArrays() {
|
|||
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
|
||||
std::memcpy(&shaderUnit.vs.inputs[mapping], ¤tAttributes[j], sizeof(vec4f));
|
||||
}
|
||||
|
||||
if constexpr (useShaderJIT) {
|
||||
|
||||
if constexpr (useShaderJIT) {
|
||||
shaderJIT.run(shaderUnit.vs);
|
||||
} else {
|
||||
shaderUnit.vs.run();
|
||||
|
@ -264,14 +289,14 @@ void GPU::drawArrays() {
|
|||
for (int i = 0; i < totalShaderOutputs; i++) {
|
||||
const u32 config = regs[PICA::InternalRegs::ShaderOutmap0 + i];
|
||||
|
||||
for (int j = 0; j < 4; j++) { // pls unroll
|
||||
for (int j = 0; j < 4; j++) { // pls unroll
|
||||
const u32 mapping = (config >> (j * 8)) & 0x1F;
|
||||
out.raw[mapping] = shaderUnit.vs.outputs[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
renderer.drawVertices(primType, std::span(vertices).first(vertexCount));
|
||||
renderer->drawVertices(primType, std::span(vertices).first(vertexCount));
|
||||
}
|
||||
|
||||
PICA::Vertex GPU::getImmediateModeVertex() {
|
||||
|
@ -289,7 +314,9 @@ PICA::Vertex GPU::getImmediateModeVertex() {
|
|||
std::memcpy(&v.s.colour, &shaderUnit.vs.outputs[1], sizeof(vec4f));
|
||||
std::memcpy(&v.s.texcoord0, &shaderUnit.vs.outputs[2], 2 * sizeof(f24));
|
||||
|
||||
printf("(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3]);
|
||||
printf(
|
||||
"(x, y, z, w) = (%f, %f, %f, %f)\n", (double)v.s.positions[0], (double)v.s.positions[1], (double)v.s.positions[2], (double)v.s.positions[3]
|
||||
);
|
||||
printf("(r, g, b, a) = (%f, %f, %f, %f)\n", (double)v.s.colour[0], (double)v.s.colour[1], (double)v.s.colour[2], (double)v.s.colour[3]);
|
||||
printf("(u, v ) = (%f, %f)\n", (double)v.s.texcoord0[0], (double)v.s.texcoord0[1]);
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
#include "PICA/gpu.hpp"
|
||||
|
||||
using namespace Floats;
|
||||
using namespace Helpers;
|
||||
|
||||
u32 GPU::readReg(u32 address) {
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
const u32 index = (address - 0x1EF01000) / sizeof(u32);
|
||||
return readInternalReg(index);
|
||||
} else {
|
||||
|
@ -15,7 +16,7 @@ u32 GPU::readReg(u32 address) {
|
|||
}
|
||||
|
||||
void GPU::writeReg(u32 address, u32 value) {
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
if (address >= 0x1EF01000 && address < 0x1EF01C00) { // Internal registers
|
||||
const u32 index = (address - 0x1EF01000) / sizeof(u32);
|
||||
writeInternalReg(index, value, 0xffffffff);
|
||||
} else {
|
||||
|
@ -59,7 +60,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
|
||||
u32 currentValue = regs[index];
|
||||
u32 newValue = (currentValue & ~mask) | (value & mask); // Only overwrite the bits specified by "mask"
|
||||
u32 newValue = (currentValue & ~mask) | (value & mask); // Only overwrite the bits specified by "mask"
|
||||
regs[index] = newValue;
|
||||
|
||||
// TODO: Figure out if things like the shader index use the unmasked value or the masked one
|
||||
|
@ -74,38 +75,38 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
break;
|
||||
|
||||
case AttribFormatHigh:
|
||||
totalAttribCount = (value >> 28) + 1; // Total number of vertex attributes
|
||||
fixedAttribMask = getBits<16, 12>(value); // Determines which vertex attributes are fixed for all vertices
|
||||
totalAttribCount = (value >> 28) + 1; // Total number of vertex attributes
|
||||
fixedAttribMask = getBits<16, 12>(value); // Determines which vertex attributes are fixed for all vertices
|
||||
break;
|
||||
|
||||
case ColourBufferLoc: {
|
||||
u32 loc = (value & 0x0fffffff) << 3;
|
||||
renderer.setColourBufferLoc(loc);
|
||||
renderer->setColourBufferLoc(loc);
|
||||
break;
|
||||
};
|
||||
|
||||
case ColourBufferFormat: {
|
||||
u32 format = getBits<16, 3>(value);
|
||||
renderer.setColourFormat(static_cast<PICA::ColorFmt>(format));
|
||||
renderer->setColourFormat(static_cast<PICA::ColorFmt>(format));
|
||||
break;
|
||||
}
|
||||
|
||||
case DepthBufferLoc: {
|
||||
u32 loc = (value & 0x0fffffff) << 3;
|
||||
renderer.setDepthBufferLoc(loc);
|
||||
renderer->setDepthBufferLoc(loc);
|
||||
break;
|
||||
}
|
||||
|
||||
case DepthBufferFormat: {
|
||||
u32 format = value & 0x3;
|
||||
renderer.setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
||||
renderer->setDepthFormat(static_cast<PICA::DepthFmt>(format));
|
||||
break;
|
||||
}
|
||||
|
||||
case FramebufferSize: {
|
||||
const u32 width = value & 0x7ff;
|
||||
const u32 height = getBits<12, 10>(value) + 1;
|
||||
renderer.setFBSize(width, height);
|
||||
renderer->setFBSize(width, height);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -116,7 +117,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
case LightingLUTData4:
|
||||
case LightingLUTData5:
|
||||
case LightingLUTData6:
|
||||
case LightingLUTData7:{
|
||||
case LightingLUTData7: {
|
||||
const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register
|
||||
const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to
|
||||
uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to
|
||||
|
@ -133,15 +134,22 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
break;
|
||||
}
|
||||
|
||||
case VertexFloatUniformIndex:
|
||||
case VertexFloatUniformIndex: {
|
||||
shaderUnit.vs.setFloatUniformIndex(value);
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexFloatUniformData0: case VertexFloatUniformData1: case VertexFloatUniformData2:
|
||||
case VertexFloatUniformData3: case VertexFloatUniformData4: case VertexFloatUniformData5:
|
||||
case VertexFloatUniformData6: case VertexFloatUniformData7:
|
||||
case VertexFloatUniformData0:
|
||||
case VertexFloatUniformData1:
|
||||
case VertexFloatUniformData2:
|
||||
case VertexFloatUniformData3:
|
||||
case VertexFloatUniformData4:
|
||||
case VertexFloatUniformData5:
|
||||
case VertexFloatUniformData6:
|
||||
case VertexFloatUniformData7: {
|
||||
shaderUnit.vs.uploadFloatUniform(value);
|
||||
break;
|
||||
}
|
||||
|
||||
case FixedAttribIndex:
|
||||
fixedAttribCount = 0;
|
||||
|
@ -162,7 +170,9 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
break;
|
||||
|
||||
case FixedAttribData0: case FixedAttribData1: case FixedAttribData2:
|
||||
case FixedAttribData0:
|
||||
case FixedAttribData1:
|
||||
case FixedAttribData2:
|
||||
fixedAttrBuff[fixedAttribCount++] = value;
|
||||
|
||||
if (fixedAttribCount == 3) {
|
||||
|
@ -170,15 +180,15 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
|
||||
vec4f attr;
|
||||
// These are stored in the reverse order anyone would expect them to be in
|
||||
attr.x() = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
|
||||
attr.y() = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
|
||||
attr.z() = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
|
||||
attr.w() = f24::fromRaw(fixedAttrBuff[0] >> 8);
|
||||
attr[0] = f24::fromRaw(fixedAttrBuff[2] & 0xffffff);
|
||||
attr[1] = f24::fromRaw(((fixedAttrBuff[1] & 0xffff) << 8) | (fixedAttrBuff[2] >> 24));
|
||||
attr[2] = f24::fromRaw(((fixedAttrBuff[0] & 0xff) << 16) | (fixedAttrBuff[1] >> 16));
|
||||
attr[3] = f24::fromRaw(fixedAttrBuff[0] >> 8);
|
||||
|
||||
// If the fixed attribute index is < 12, we're just writing to one of the fixed attributes
|
||||
if (fixedAttribIndex < 12) [[likely]] {
|
||||
shaderUnit.vs.fixedAttributes[fixedAttribIndex++] = attr;
|
||||
} else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex
|
||||
} else if (fixedAttribIndex == 15) { // Otherwise if it's 15, we're submitting an immediate mode vertex
|
||||
const uint totalAttrCount = (regs[PICA::InternalRegs::VertexShaderAttrNum] & 0xf) + 1;
|
||||
if (totalAttrCount <= immediateModeAttrIndex) {
|
||||
printf("Broken state in the immediate mode vertex submission pipeline. Failing silently\n");
|
||||
|
@ -199,13 +209,15 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
// If we've reached 3 verts, issue a draw call
|
||||
// Handle rendering depending on the primitive type
|
||||
if (immediateModeVertIndex == 3) {
|
||||
renderer.drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
||||
renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
|
||||
|
||||
switch (primType) {
|
||||
// Triangle or geometry primitive. Draw a triangle and discard all vertices
|
||||
case 0: case 3:
|
||||
case 0:
|
||||
case 3: {
|
||||
immediateModeVertIndex = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// Triangle strip. Draw triangle, discard first vertex and keep the last 2
|
||||
case 1:
|
||||
|
@ -223,54 +235,72 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else { // Writing to fixed attributes 13 and 14 probably does nothing, but we'll see
|
||||
} else { // Writing to fixed attributes 13 and 14 probably does nothing, but we'll see
|
||||
log("Wrote to invalid fixed vertex attribute %d\n", fixedAttribIndex);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case VertexShaderOpDescriptorIndex:
|
||||
case VertexShaderOpDescriptorIndex: {
|
||||
shaderUnit.vs.setOpDescriptorIndex(value);
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexShaderOpDescriptorData0: case VertexShaderOpDescriptorData1: case VertexShaderOpDescriptorData2:
|
||||
case VertexShaderOpDescriptorData3: case VertexShaderOpDescriptorData4: case VertexShaderOpDescriptorData5:
|
||||
case VertexShaderOpDescriptorData6: case VertexShaderOpDescriptorData7:
|
||||
case VertexShaderOpDescriptorData0:
|
||||
case VertexShaderOpDescriptorData1:
|
||||
case VertexShaderOpDescriptorData2:
|
||||
case VertexShaderOpDescriptorData3:
|
||||
case VertexShaderOpDescriptorData4:
|
||||
case VertexShaderOpDescriptorData5:
|
||||
case VertexShaderOpDescriptorData6:
|
||||
case VertexShaderOpDescriptorData7: {
|
||||
shaderUnit.vs.uploadDescriptor(value);
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexBoolUniform:
|
||||
case VertexBoolUniform: {
|
||||
shaderUnit.vs.boolUniform = value & 0xffff;
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexIntUniform0: case VertexIntUniform1: case VertexIntUniform2: case VertexIntUniform3:
|
||||
case VertexIntUniform0:
|
||||
case VertexIntUniform1:
|
||||
case VertexIntUniform2:
|
||||
case VertexIntUniform3: {
|
||||
shaderUnit.vs.uploadIntUniform(index - VertexIntUniform0, value);
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexShaderData0: case VertexShaderData1: case VertexShaderData2: case VertexShaderData3:
|
||||
case VertexShaderData4: case VertexShaderData5: case VertexShaderData6: case VertexShaderData7:
|
||||
case VertexShaderData0:
|
||||
case VertexShaderData1:
|
||||
case VertexShaderData2:
|
||||
case VertexShaderData3:
|
||||
case VertexShaderData4:
|
||||
case VertexShaderData5:
|
||||
case VertexShaderData6:
|
||||
case VertexShaderData7: {
|
||||
shaderUnit.vs.uploadWord(value);
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexShaderEntrypoint:
|
||||
case VertexShaderEntrypoint: {
|
||||
shaderUnit.vs.entrypoint = value & 0xffff;
|
||||
break;
|
||||
}
|
||||
|
||||
case VertexShaderTransferEnd:
|
||||
if (value != 0) shaderUnit.vs.finalize();
|
||||
break;
|
||||
|
||||
case VertexShaderTransferIndex:
|
||||
shaderUnit.vs.setBufferIndex(value);
|
||||
break;
|
||||
case VertexShaderTransferIndex: shaderUnit.vs.setBufferIndex(value); break;
|
||||
|
||||
// Command lists can write to the command processor registers and change the command list stream
|
||||
// Several games are known to do this, including New Super Mario Bros 2 and Super Mario 3D Land
|
||||
case CmdBufTrigger0:
|
||||
case CmdBufTrigger1: {
|
||||
if (value != 0) { // A non-zero value triggers command list processing
|
||||
int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1)
|
||||
if (value != 0) { // A non-zero value triggers command list processing
|
||||
int bufferIndex = index - CmdBufTrigger0; // Index of the command buffer to execute (0 or 1)
|
||||
u32 addr = (regs[CmdBufAddr0 + bufferIndex] & 0xfffffff) << 3;
|
||||
u32 size = (regs[CmdBufSize0 + bufferIndex] & 0xfffff) << 3;
|
||||
|
||||
|
@ -285,15 +315,13 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
|||
default:
|
||||
// Vertex attribute registers
|
||||
if (index >= AttribInfoStart && index <= AttribInfoEnd) {
|
||||
uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to
|
||||
uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to?
|
||||
uint attributeIndex = (index - AttribInfoStart) / 3; // Which attribute are we writing to
|
||||
uint reg = (index - AttribInfoStart) % 3; // Which of this attribute's registers are we writing to?
|
||||
auto& attr = attributeInfo[attributeIndex];
|
||||
|
||||
switch (reg) {
|
||||
case 0: attr.offset = value & 0xfffffff; break; // Attribute offset
|
||||
case 1:
|
||||
attr.config1 = value;
|
||||
break;
|
||||
case 0: attr.offset = value & 0xfffffff; break; // Attribute offset
|
||||
case 1: attr.config1 = value; break;
|
||||
case 2:
|
||||
attr.config2 = value;
|
||||
attr.size = getBits<16, 8>(value);
|
||||
|
@ -339,13 +367,13 @@ void GPU::startCommandList(u32 addr, u32 size) {
|
|||
|
||||
u32 id = header & 0xffff;
|
||||
u32 paramMaskIndex = getBits<16, 4>(header);
|
||||
u32 paramCount = getBits<20, 8>(header); // Number of additional parameters
|
||||
u32 paramCount = getBits<20, 8>(header); // Number of additional parameters
|
||||
// Bit 31 tells us whether this command is going to write to multiple sequential registers (if the bit is 1)
|
||||
// Or if all written values will go to the same register (If the bit is 0). It's essentially the value that
|
||||
// gets added to the "id" field after each register write
|
||||
bool consecutiveWritingMode = (header >> 31) != 0;
|
||||
|
||||
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
|
||||
u32 mask = maskLUT[paramMaskIndex]; // Actual parameter mask
|
||||
// Increment the ID by 1 after each write if we're in consecutive mode, or 0 otherwise
|
||||
u32 idIncrement = (consecutiveWritingMode) ? 1 : 0;
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#include "PICA/shader.hpp"
|
||||
#include <cmath>
|
||||
|
||||
#include "PICA/shader.hpp"
|
||||
|
||||
using namespace Helpers;
|
||||
|
||||
void PICAShader::run() {
|
||||
|
@ -11,20 +12,23 @@ void PICAShader::run() {
|
|||
|
||||
while (true) {
|
||||
const u32 instruction = loadedShader[pc++];
|
||||
const u32 opcode = instruction >> 26; // Top 6 bits are the opcode
|
||||
const u32 opcode = instruction >> 26; // Top 6 bits are the opcode
|
||||
|
||||
switch (opcode) {
|
||||
case ShaderOpcodes::ADD: add(instruction); break;
|
||||
case ShaderOpcodes::CALL: call(instruction); break;
|
||||
case ShaderOpcodes::CALLC: callc(instruction); break;
|
||||
case ShaderOpcodes::CALLU: callu(instruction); break;
|
||||
case ShaderOpcodes::CMP1: case ShaderOpcodes::CMP2:
|
||||
case ShaderOpcodes::CMP1:
|
||||
case ShaderOpcodes::CMP2: {
|
||||
cmp(instruction);
|
||||
break;
|
||||
}
|
||||
|
||||
case ShaderOpcodes::DP3: dp3(instruction); break;
|
||||
case ShaderOpcodes::DP4: dp4(instruction); break;
|
||||
case ShaderOpcodes::DPHI: dphi(instruction); break;
|
||||
case ShaderOpcodes::END: return; // Stop running shader
|
||||
case ShaderOpcodes::END: return; // Stop running shader
|
||||
case ShaderOpcodes::EX2: ex2(instruction); break;
|
||||
case ShaderOpcodes::FLR: flr(instruction); break;
|
||||
case ShaderOpcodes::IFC: ifc(instruction); break;
|
||||
|
@ -38,31 +42,47 @@ void PICAShader::run() {
|
|||
case ShaderOpcodes::MOV: mov(instruction); break;
|
||||
case ShaderOpcodes::MOVA: mova(instruction); break;
|
||||
case ShaderOpcodes::MUL: mul(instruction); break;
|
||||
case ShaderOpcodes::NOP: break; // Do nothing
|
||||
case ShaderOpcodes::NOP: break; // Do nothing
|
||||
case ShaderOpcodes::RCP: rcp(instruction); break;
|
||||
case ShaderOpcodes::RSQ: rsq(instruction); break;
|
||||
case ShaderOpcodes::SGEI: sgei(instruction); break;
|
||||
case ShaderOpcodes::SLT: slt(instruction); break;
|
||||
case ShaderOpcodes::SLTI: slti(instruction); break;
|
||||
|
||||
case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
|
||||
case 0x30:
|
||||
case 0x31:
|
||||
case 0x32:
|
||||
case 0x33:
|
||||
case 0x34:
|
||||
case 0x35:
|
||||
case 0x36:
|
||||
case 0x37: {
|
||||
madi(instruction);
|
||||
break;
|
||||
}
|
||||
|
||||
case 0x38: case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
|
||||
case 0x38:
|
||||
case 0x39:
|
||||
case 0x3A:
|
||||
case 0x3B:
|
||||
case 0x3C:
|
||||
case 0x3D:
|
||||
case 0x3E:
|
||||
case 0x3F: {
|
||||
mad(instruction);
|
||||
break;
|
||||
}
|
||||
|
||||
default:Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
||||
default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
|
||||
}
|
||||
|
||||
// Handle control flow statements. The ordering is important as the priority goes: LOOP > IF > CALL
|
||||
// Handle loop
|
||||
if (loopIndex != 0) {
|
||||
auto& loop = loopInfo[loopIndex - 1];
|
||||
if (pc == loop.endingPC) { // Check if the loop needs to start over
|
||||
if (pc == loop.endingPC) { // Check if the loop needs to start over
|
||||
loop.iterations -= 1;
|
||||
if (loop.iterations == 0) // If the loop ended, go one level down on the loop stack
|
||||
if (loop.iterations == 0) // If the loop ended, go one level down on the loop stack
|
||||
loopIndex -= 1;
|
||||
|
||||
loopCounter += loop.increment;
|
||||
|
@ -73,7 +93,7 @@ void PICAShader::run() {
|
|||
// Handle ifs
|
||||
if (ifIndex != 0) {
|
||||
auto& info = conditionalInfo[ifIndex - 1];
|
||||
if (pc == info.endingPC) { // Check if the IF block ended
|
||||
if (pc == info.endingPC) { // Check if the IF block ended
|
||||
pc = info.newPC;
|
||||
ifIndex -= 1;
|
||||
}
|
||||
|
@ -82,7 +102,7 @@ void PICAShader::run() {
|
|||
// Handle calls
|
||||
if (callIndex != 0) {
|
||||
auto& info = callInfo[callIndex - 1];
|
||||
if (pc == info.endingPC) { // Check if the CALL block ended
|
||||
if (pc == info.endingPC) { // Check if the CALL block ended
|
||||
pc = info.returnPC;
|
||||
callIndex -= 1;
|
||||
}
|
||||
|
@ -92,15 +112,15 @@ void PICAShader::run() {
|
|||
|
||||
// Calculate the actual source value using an instruction's source field and it's respective index value
|
||||
// The index value is used to apply relative addressing when index != 0 by adding one of the 3 addr registers to the
|
||||
// source field, but only with the original source field is pointing at a vector uniform register
|
||||
// source field, but only with the original source field is pointing at a vector uniform register
|
||||
u8 PICAShader::getIndexedSource(u32 source, u32 index) {
|
||||
if (source < 0x20) // No offset is applied if the source isn't pointing to a vector uniform reg
|
||||
if (source < 0x20) // No offset is applied if the source isn't pointing to a vector uniform reg
|
||||
return source;
|
||||
|
||||
switch (index) {
|
||||
case 0: [[likely]] return u8(source); // No offset applied
|
||||
case 1: return u8(source + addrRegister.x());
|
||||
case 2: return u8(source + addrRegister.y());
|
||||
case 0: [[likely]] return u8(source); // No offset applied
|
||||
case 1: return u8(source + addrRegister[0]);
|
||||
case 2: return u8(source + addrRegister[1]);
|
||||
case 3: return u8(source + loopCounter);
|
||||
}
|
||||
|
||||
|
@ -117,7 +137,7 @@ PICAShader::vec4f PICAShader::getSource(u32 source) {
|
|||
return floatUniforms[source - 0x20];
|
||||
else {
|
||||
Helpers::warn("[PICA] Unimplemented source value: %X\n", source);
|
||||
return vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
|
||||
return vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,13 +156,13 @@ bool PICAShader::isCondTrue(u32 instruction) {
|
|||
bool refX = (getBit<25>(instruction)) != 0;
|
||||
|
||||
switch (condition) {
|
||||
case 0: // Either cmp register matches
|
||||
case 0: // Either cmp register matches
|
||||
return cmpRegister[0] == refX || cmpRegister[1] == refY;
|
||||
case 1: // Both cmp registers match
|
||||
case 1: // Both cmp registers match
|
||||
return cmpRegister[0] == refX && cmpRegister[1] == refY;
|
||||
case 2: // At least cmp.x matches
|
||||
case 2: // At least cmp.x matches
|
||||
return cmpRegister[0] == refX;
|
||||
default: // At least cmp.y matches
|
||||
default: // At least cmp.y matches
|
||||
return cmpRegister[1] == refY;
|
||||
}
|
||||
}
|
||||
|
@ -150,7 +170,7 @@ bool PICAShader::isCondTrue(u32 instruction) {
|
|||
void PICAShader::add(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -171,7 +191,7 @@ void PICAShader::add(u32 instruction) {
|
|||
void PICAShader::mul(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -210,7 +230,7 @@ void PICAShader::flr(u32 instruction) {
|
|||
void PICAShader::max(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -232,7 +252,7 @@ void PICAShader::max(u32 instruction) {
|
|||
void PICAShader::min(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -278,16 +298,16 @@ void PICAShader::mova(u32 instruction) {
|
|||
vec4f srcVector = getSourceSwizzled<1>(src, operandDescriptor);
|
||||
|
||||
u32 componentMask = operandDescriptor & 0xf;
|
||||
if (componentMask & 0b1000) // x component
|
||||
addrRegister.x() = static_cast<s32>(srcVector.x().toFloat32());
|
||||
if (componentMask & 0b0100) // y component
|
||||
addrRegister.y() = static_cast<s32>(srcVector.y().toFloat32());
|
||||
if (componentMask & 0b1000) // x component
|
||||
addrRegister[0] = static_cast<s32>(srcVector[0].toFloat32());
|
||||
if (componentMask & 0b0100) // y component
|
||||
addrRegister[1] = static_cast<s32>(srcVector[1].toFloat32());
|
||||
}
|
||||
|
||||
void PICAShader::dp3(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -309,7 +329,7 @@ void PICAShader::dp3(u32 instruction) {
|
|||
void PICAShader::dp4(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -480,7 +500,7 @@ void PICAShader::madi(u32 instruction) {
|
|||
void PICAShader::slt(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
|
@ -542,11 +562,11 @@ void PICAShader::slti(u32 instruction) {
|
|||
void PICAShader::cmp(u32 instruction) {
|
||||
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 cmpY = getBits<21, 3>(instruction);
|
||||
const u32 cmpX = getBits<24, 3>(instruction);
|
||||
const u32 cmpOperations[2] = { cmpX, cmpY };
|
||||
const u32 cmpOperations[2] = {cmpX, cmpY};
|
||||
|
||||
if (idx) Helpers::panic("[PICA] CMP: idx != 0");
|
||||
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||
|
@ -554,33 +574,34 @@ void PICAShader::cmp(u32 instruction) {
|
|||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
switch (cmpOperations[i]) {
|
||||
case 0: // Equal
|
||||
case 0: // Equal
|
||||
cmpRegister[i] = srcVec1[i] == srcVec2[i];
|
||||
break;
|
||||
|
||||
case 1: // Not equal
|
||||
case 1: // Not equal
|
||||
cmpRegister[i] = srcVec1[i] != srcVec2[i];
|
||||
break;
|
||||
|
||||
case 2: // Less than
|
||||
case 2: // Less than
|
||||
cmpRegister[i] = srcVec1[i] < srcVec2[i];
|
||||
break;
|
||||
|
||||
case 3: // Less than or equal
|
||||
case 3: // Less than or equal
|
||||
cmpRegister[i] = srcVec1[i] <= srcVec2[i];
|
||||
break;
|
||||
|
||||
case 4: // Greater than
|
||||
case 4: // Greater than
|
||||
cmpRegister[i] = srcVec1[i] > srcVec2[i];
|
||||
break;
|
||||
|
||||
case 5: // Greater than or equal
|
||||
case 5: // Greater than or equal
|
||||
cmpRegister[i] = srcVec1[i] >= srcVec2[i];
|
||||
break;
|
||||
|
||||
default:
|
||||
default: {
|
||||
cmpRegister[i] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -604,7 +625,7 @@ void PICAShader::ifc(u32 instruction) {
|
|||
|
||||
void PICAShader::ifu(u32 instruction) {
|
||||
const u32 dest = getBits<10, 12>(instruction);
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
|
||||
if (boolUniform & (1 << bit)) {
|
||||
if (ifIndex >= 8) [[unlikely]]
|
||||
|
@ -615,8 +636,7 @@ void PICAShader::ifu(u32 instruction) {
|
|||
auto& block = conditionalInfo[ifIndex++];
|
||||
block.endingPC = dest;
|
||||
block.newPC = dest + num;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
pc = dest;
|
||||
}
|
||||
}
|
||||
|
@ -637,12 +657,12 @@ void PICAShader::call(u32 instruction) {
|
|||
|
||||
void PICAShader::callc(u32 instruction) {
|
||||
if (isCondTrue(instruction)) {
|
||||
call(instruction); // Pls inline
|
||||
call(instruction); // Pls inline
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::callu(u32 instruction) {
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
|
||||
if (boolUniform & (1 << bit)) {
|
||||
if (callIndex >= 4) [[unlikely]]
|
||||
|
@ -664,26 +684,27 @@ void PICAShader::loop(u32 instruction) {
|
|||
Helpers::panic("[PICA] Overflowed loop stack");
|
||||
|
||||
u32 dest = getBits<10, 12>(instruction);
|
||||
auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from
|
||||
loopCounter = uniform.y();
|
||||
auto& uniform = intUniforms[getBits<22, 2>(instruction)]; // The uniform we'll get loop info from
|
||||
loopCounter = uniform[1];
|
||||
auto& loop = loopInfo[loopIndex++];
|
||||
|
||||
loop.startingPC = pc;
|
||||
loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here
|
||||
loop.iterations = uniform.x() + 1;
|
||||
loop.increment = uniform.z();
|
||||
loop.endingPC = dest + 1; // Loop is inclusive so we need + 1 here
|
||||
loop.iterations = uniform[0] + 1;
|
||||
loop.increment = uniform[2];
|
||||
}
|
||||
|
||||
void PICAShader::jmpc(u32 instruction) {
|
||||
if (isCondTrue(instruction))
|
||||
if (isCondTrue(instruction)) {
|
||||
pc = getBits<10, 12>(instruction);
|
||||
}
|
||||
}
|
||||
|
||||
void PICAShader::jmpu(u32 instruction) {
|
||||
const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we want to compare to true, otherwise compare to false
|
||||
const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we want to compare to true, otherwise compare to false
|
||||
const u32 dest = getBits<10, 12>(instruction);
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
|
||||
|
||||
if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want
|
||||
if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want
|
||||
pc = dest;
|
||||
}
|
|
@ -1,4 +1,5 @@
|
|||
#include "PICA/shader_unit.hpp"
|
||||
|
||||
#include "cityhash.hpp"
|
||||
|
||||
void ShaderUnit::reset() {
|
||||
|
@ -18,18 +19,18 @@ void PICAShader::reset() {
|
|||
opDescriptorIndex = 0;
|
||||
f32UniformTransfer = false;
|
||||
|
||||
const vec4f zero = vec4f({ f24::zero(), f24::zero(), f24::zero(), f24::zero() });
|
||||
const vec4f zero = vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
|
||||
inputs.fill(zero);
|
||||
floatUniforms.fill(zero);
|
||||
outputs.fill(zero);
|
||||
tempRegisters.fill(zero);
|
||||
|
||||
for (auto& e : intUniforms) {
|
||||
e.x() = e.y() = e.z() = e.w() = 0;
|
||||
e[0] = e[1] = e[2] = e[3] = 0;
|
||||
}
|
||||
|
||||
addrRegister.x() = 0;
|
||||
addrRegister.y() = 0;
|
||||
addrRegister[0] = 0;
|
||||
addrRegister[1] = 0;
|
||||
loopCounter = 0;
|
||||
|
||||
codeHashDirty = true;
|
||||
|
|
210
src/core/action_replay.cpp
Normal file
210
src/core/action_replay.cpp
Normal file
|
@ -0,0 +1,210 @@
|
|||
#include "action_replay.hpp"
|
||||
|
||||
ActionReplay::ActionReplay(Memory& mem, HIDService& hid) : mem(mem), hid(hid) { reset(); }
|
||||
|
||||
void ActionReplay::reset() {
|
||||
// Default value of storage regs is 0
|
||||
storage1 = 0;
|
||||
storage2 = 0;
|
||||
|
||||
// TODO: Is the active storage persistent or not?
|
||||
activeStorage = &storage1;
|
||||
}
|
||||
|
||||
void ActionReplay::runCheat(const Cheat& cheat) {
|
||||
// Set offset and data registers to 0 at the start of a cheat
|
||||
data1 = data2 = offset1 = offset2 = 0;
|
||||
pc = 0;
|
||||
ifStackIndex = 0;
|
||||
loopStackIndex = 0;
|
||||
running = true;
|
||||
|
||||
activeOffset = &offset1;
|
||||
activeData = &data1;
|
||||
|
||||
while (running) {
|
||||
// See if we can fetch 1 64-bit opcode, otherwise we're out of bounds. Cheats seem to end when going out of bounds?
|
||||
if (pc + 1 >= cheat.size()) {
|
||||
return;
|
||||
}
|
||||
// Fetch instruction
|
||||
const u32 instruction = cheat[pc++];
|
||||
|
||||
// Instructions D0000000 00000000 and D2000000 00000000 are unconditional
|
||||
bool isUnconditional = cheat[pc] == 0 && (instruction == 0xD0000000 || instruction == 0xD2000000);
|
||||
if (ifStackIndex > 0 && !isUnconditional && !ifStack[ifStackIndex - 1]) {
|
||||
pc++; // Eat up dummy word
|
||||
continue; // Skip conditional instructions where the condition is false
|
||||
}
|
||||
|
||||
runInstruction(cheat, instruction);
|
||||
}
|
||||
}
|
||||
|
||||
u8 ActionReplay::read8(u32 addr) { return mem.read8(addr); }
|
||||
u16 ActionReplay::read16(u32 addr) { return mem.read16(addr); }
|
||||
u32 ActionReplay::read32(u32 addr) { return mem.read32(addr); }
|
||||
|
||||
// Some AR cheats seem to want to write to unmapped memory or memory that straight up does not exist
|
||||
|
||||
#define MAKE_WRITE_HANDLER(size) \
|
||||
void ActionReplay::write##size(u32 addr, u##size value) { \
|
||||
auto pointerWrite = mem.getWritePointer(addr); \
|
||||
if (pointerWrite) { \
|
||||
*(u##size*)pointerWrite = value; \
|
||||
} else { \
|
||||
auto pointerRead = mem.getReadPointer(addr); \
|
||||
if (pointerRead) { \
|
||||
*(u##size*)pointerRead = value; \
|
||||
} else { \
|
||||
Helpers::warn("AR code tried to write to invalid address: %08X\n", addr); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
MAKE_WRITE_HANDLER(8)
|
||||
MAKE_WRITE_HANDLER(16)
|
||||
MAKE_WRITE_HANDLER(32)
|
||||
#undef MAKE_WRITE_HANDLER
|
||||
|
||||
void ActionReplay::runInstruction(const Cheat& cheat, u32 instruction) {
|
||||
// Top nibble determines the instruction type
|
||||
const u32 type = instruction >> 28;
|
||||
|
||||
switch (type) {
|
||||
// 32-bit write to [XXXXXXX + offset]
|
||||
case 0x0: {
|
||||
const u32 baseAddr = Helpers::getBits<0, 28>(instruction);
|
||||
const u32 value = cheat[pc++];
|
||||
write32(baseAddr + *activeOffset, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// 16-bit write to [XXXXXXX + offset]
|
||||
case 0x1: {
|
||||
const u32 baseAddr = Helpers::getBits<0, 28>(instruction);
|
||||
const u16 value = u16(cheat[pc++]);
|
||||
write16(baseAddr + *activeOffset, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// 8-bit write to [XXXXXXX + offset]
|
||||
case 0x2: {
|
||||
const u32 baseAddr = Helpers::getBits<0, 28>(instruction);
|
||||
const u8 value = u8(cheat[pc++]);
|
||||
write8(baseAddr + *activeOffset, value);
|
||||
break;
|
||||
}
|
||||
|
||||
// Less Than (YYYYYYYY < [XXXXXXX + offset])
|
||||
case 0x4: {
|
||||
const u32 baseAddr = Helpers::getBits<0, 28>(instruction);
|
||||
const u32 imm = cheat[pc++];
|
||||
const u32 value = read32(baseAddr + *activeOffset);
|
||||
Helpers::panic("TODO: How do ActionReplay conditional blocks work?");
|
||||
break;
|
||||
}
|
||||
|
||||
case 0xD: executeDType(cheat, instruction); break;
|
||||
default: Helpers::panic("Unimplemented ActionReplay instruction type %X", type); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ActionReplay::executeDType(const Cheat& cheat, u32 instruction) {
|
||||
switch (instruction) {
|
||||
case 0xD3000000: offset1 = cheat[pc++]; break;
|
||||
case 0xD3000001: offset2 = cheat[pc++]; break;
|
||||
case 0xDC000000: *activeOffset += cheat[pc++]; break;
|
||||
|
||||
// DD000000 XXXXXXXX - if KEYPAD has value XXXXXXXX execute next block
|
||||
case 0xDD000000: {
|
||||
const u32 mask = cheat[pc++];
|
||||
const u32 buttons = hid.getOldButtons();
|
||||
|
||||
pushConditionBlock((buttons & mask) == mask);
|
||||
break;
|
||||
}
|
||||
|
||||
// Offset register ops
|
||||
case 0xDF000000: {
|
||||
const u32 subopcode = cheat[pc++];
|
||||
switch (subopcode) {
|
||||
case 0x00000000: activeOffset = &offset1; break;
|
||||
case 0x00000001: activeOffset = &offset2; break;
|
||||
case 0x00010000: offset2 = offset1; break;
|
||||
case 0x00010001: offset1 = offset2; break;
|
||||
case 0x00020000: data1 = offset1; break;
|
||||
case 0x00020001: data2 = offset2; break;
|
||||
default:
|
||||
Helpers::warn("Unknown ActionReplay offset operation");
|
||||
running = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Data register operations
|
||||
case 0xDF000001: {
|
||||
const u32 subopcode = cheat[pc++];
|
||||
switch (subopcode) {
|
||||
case 0x00000000: activeData = &data1; break;
|
||||
case 0x00000001: activeData = &data2; break;
|
||||
|
||||
case 0x00010000: data2 = data1; break;
|
||||
case 0x00010001: data1 = data2; break;
|
||||
case 0x00020000: offset1 = data1; break;
|
||||
case 0x00020001: offset2 = data2; break;
|
||||
default:
|
||||
Helpers::warn("Unknown ActionReplay data operation");
|
||||
running = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Storage register operations
|
||||
case 0xDF000002: {
|
||||
const u32 subopcode = cheat[pc++];
|
||||
switch (subopcode) {
|
||||
case 0x00000000: activeStorage = &storage1; break;
|
||||
case 0x00000001: activeStorage = &storage2; break;
|
||||
|
||||
case 0x00010000: data1 = storage1; break;
|
||||
case 0x00010001: data2 = storage2; break;
|
||||
case 0x00020000: storage1 = data1; break;
|
||||
case 0x00020001: storage2 = data2; break;
|
||||
default:
|
||||
Helpers::warn("Unknown ActionReplay data operation: %08X", subopcode);
|
||||
running = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Control flow block operations
|
||||
case 0xD2000000: {
|
||||
const u32 subopcode = cheat[pc++];
|
||||
switch (subopcode) {
|
||||
// Ends all loop/execute blocks
|
||||
case 0:
|
||||
loopStackIndex = 0;
|
||||
ifStackIndex = 0;
|
||||
break;
|
||||
default: Helpers::panic("Unknown ActionReplay control flow operation: %08X", subopcode); break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default: Helpers::panic("ActionReplay: Unimplemented d-type opcode: %08X", instruction); break;
|
||||
}
|
||||
}
|
||||
|
||||
void ActionReplay::pushConditionBlock(bool condition) {
|
||||
if (ifStackIndex >= 32) {
|
||||
Helpers::warn("ActionReplay if stack overflowed");
|
||||
running = false;
|
||||
return;
|
||||
}
|
||||
|
||||
ifStack[ifStackIndex++] = condition;
|
||||
}
|
28
src/core/cheats.cpp
Normal file
28
src/core/cheats.cpp
Normal file
|
@ -0,0 +1,28 @@
|
|||
#include "cheats.hpp"
|
||||
|
||||
Cheats::Cheats(Memory& mem, HIDService& hid) : ar(mem, hid) { reset(); }
|
||||
|
||||
void Cheats::reset() {
|
||||
cheats.clear(); // Unload loaded cheats
|
||||
ar.reset(); // Reset ActionReplay
|
||||
}
|
||||
|
||||
void Cheats::addCheat(const Cheat& cheat) { cheats.push_back(cheat); }
|
||||
|
||||
void Cheats::run() {
|
||||
for (const Cheat& cheat : cheats) {
|
||||
switch (cheat.type) {
|
||||
case CheatType::ActionReplay: {
|
||||
ar.runCheat(cheat.instructions);
|
||||
break;
|
||||
}
|
||||
|
||||
case CheatType::Gateway: {
|
||||
Helpers::panic("Gateway cheats not supported yet! Only Action Replay is supported!");
|
||||
break;
|
||||
}
|
||||
|
||||
default: Helpers::panic("Unknown cheat type");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,3 +1,10 @@
|
|||
#include <array>
|
||||
#include <cctype>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "ipc.hpp"
|
||||
#include "kernel.hpp"
|
||||
|
||||
namespace DirectoryOps {
|
||||
|
@ -7,6 +14,79 @@ namespace DirectoryOps {
|
|||
};
|
||||
}
|
||||
|
||||
// Helper to convert std::string to an 8.3 filename to mimic how Directory::Read works
|
||||
using ShortFilename = std::array<char, 9>;
|
||||
using ShortExtension = std::array<char, 4>;
|
||||
using Filename83 = std::pair<ShortFilename, ShortExtension>;
|
||||
|
||||
// The input string should be the stem and extension together, not separately
|
||||
// Eg something like "boop.png", "panda.txt", etc
|
||||
Filename83 convertTo83(const std::string& path) {
|
||||
ShortFilename filename;
|
||||
ShortExtension extension;
|
||||
|
||||
// Convert a character to add it to the 8.3 name
|
||||
// "Characters such as + are changed to the underscore _, and letters are put in uppercase"
|
||||
// For now we put letters in uppercase until we find out what is supposed to be converted to _ and so on
|
||||
auto convertCharacter = [](char c) { return (char) std::toupper(c); };
|
||||
|
||||
// List of forbidden character for 8.3 filenames, from Citra
|
||||
// TODO: Use constexpr when C++20 support is solid
|
||||
const std::string forbiddenChars = ".\"/\\[]:;=, ";
|
||||
|
||||
// By default space-initialize the whole name, append null terminator in the end for both the filename and extension
|
||||
filename.fill(' ');
|
||||
extension.fill(' ');
|
||||
filename[filename.size() - 1] = '\0';
|
||||
extension[extension.size() - 1] = '\0';
|
||||
|
||||
// Find the position of the dot in the string
|
||||
auto dotPos = path.rfind('.');
|
||||
// Wikipedia: If a file name has no extension, a trailing . has no effect
|
||||
// Thus check if the last character is a dot and ignore it, prefering the previous dot if it exists
|
||||
if (dotPos == path.size() - 1) {
|
||||
dotPos = path.rfind('.', dotPos); // Get previous dot
|
||||
}
|
||||
|
||||
// If pointPos is not npos we have a valid dot character, and as such an extension
|
||||
bool haveExtension = dotPos != std::string::npos;
|
||||
int validCharacterCount = 0;
|
||||
bool filenameTooBig = false;
|
||||
|
||||
// Parse characters until we're done OR until we reach 9 characters, in which case according to Wikipedia we must truncate to 6 letters
|
||||
// And append ~1 in the end
|
||||
for (auto c : path.substr(0, dotPos)) {
|
||||
// Character is forbidden, we must ignore it
|
||||
if (forbiddenChars.find(c) != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// We already have capped the amount of characters, thus our filename is too big
|
||||
if (validCharacterCount == 8) {
|
||||
filenameTooBig = true;
|
||||
break;
|
||||
}
|
||||
filename[validCharacterCount++] = convertCharacter(c); // Append character to filename
|
||||
}
|
||||
|
||||
// Truncate name to 6 characters and denote that it is too big
|
||||
// TODO: Wikipedia says we should also do this if the filename contains an invalid character, including spaces. Must test
|
||||
if (filenameTooBig) {
|
||||
filename[6] = '~';
|
||||
filename[7] = '1';
|
||||
}
|
||||
|
||||
if (haveExtension) {
|
||||
int extensionLen = 0;
|
||||
// Copy up to 3 characters from the dot onwards to the extension
|
||||
for (auto c : path.substr(dotPos + 1, 3)) {
|
||||
extension[extensionLen++] = convertCharacter(c);
|
||||
}
|
||||
}
|
||||
|
||||
return {filename, extension};
|
||||
}
|
||||
|
||||
void Kernel::handleDirectoryOperation(u32 messagePointer, Handle directory) {
|
||||
const u32 cmd = mem.read32(messagePointer);
|
||||
switch (cmd) {
|
||||
|
@ -25,16 +105,77 @@ void Kernel::closeDirectory(u32 messagePointer, Handle directory) {
|
|||
}
|
||||
|
||||
p->getData<DirectorySession>()->isOpen = false;
|
||||
mem.write32(messagePointer, IPC::responseHeader(0x802, 1, 0));
|
||||
mem.write32(messagePointer + 4, Result::Success);
|
||||
}
|
||||
|
||||
|
||||
void Kernel::readDirectory(u32 messagePointer, Handle directory) {
|
||||
const u32 entryCount = mem.read32(messagePointer + 4);
|
||||
const u32 outPointer = mem.read32(messagePointer + 12);
|
||||
logFileIO("Directory::Read (handle = %X, entry count = %d, out pointer = %08X)\n", directory, entryCount, outPointer);
|
||||
Helpers::panicDev("Unimplemented FsDir::Read");
|
||||
|
||||
const auto p = getObject(directory, KernelObjectType::Directory);
|
||||
if (p == nullptr) [[unlikely]] {
|
||||
Helpers::panic("Called ReadDirectory on non-existent directory");
|
||||
}
|
||||
|
||||
DirectorySession* session = p->getData<DirectorySession>();
|
||||
if (!session->pathOnDisk.has_value()) [[unlikely]] {
|
||||
Helpers::panic("Called ReadDirectory on directory that doesn't have a path on disk");
|
||||
}
|
||||
|
||||
std::filesystem::path dirPath = session->pathOnDisk.value();
|
||||
|
||||
int count = 0;
|
||||
while (count < entryCount && session->currentEntry < session->entries.size()) {
|
||||
const auto& entry = session->entries[session->currentEntry];
|
||||
std::filesystem::path path = entry.path;
|
||||
std::filesystem::path filename = path.filename();
|
||||
|
||||
std::filesystem::path relative = path.lexically_relative(dirPath);
|
||||
bool isDirectory = std::filesystem::is_directory(relative);
|
||||
|
||||
std::u16string nameU16 = relative.u16string();
|
||||
bool isHidden = nameU16[0] == u'.'; // If the first character is a dot then this is a hidden file/folder
|
||||
|
||||
const u32 entryPointer = outPointer + (count * 0x228); // 0x228 is the size of a single entry
|
||||
u32 utfPointer = entryPointer;
|
||||
u32 namePointer = entryPointer + 0x20C;
|
||||
u32 extensionPointer = entryPointer + 0x216;
|
||||
u32 attributePointer = entryPointer + 0x21C;
|
||||
u32 sizePointer = entryPointer + 0x220;
|
||||
|
||||
std::string filenameString = filename.string();
|
||||
auto [shortFilename, shortExtension] = convertTo83(filenameString);
|
||||
|
||||
for (auto c : nameU16) {
|
||||
mem.write16(utfPointer, u16(c));
|
||||
utfPointer += sizeof(u16);
|
||||
}
|
||||
mem.write16(utfPointer, 0); // Null terminate the UTF16 name
|
||||
|
||||
// Write 8.3 filename-extension
|
||||
for (auto c : shortFilename) {
|
||||
mem.write8(namePointer, u8(c));
|
||||
namePointer += sizeof(u8);
|
||||
}
|
||||
|
||||
for (auto c : shortExtension) {
|
||||
mem.write8(extensionPointer, u8(c));
|
||||
extensionPointer += sizeof(u8);
|
||||
}
|
||||
|
||||
mem.write8(outPointer + 0x21A, 1); // Always 1 according to 3DBrew
|
||||
mem.write8(attributePointer, entry.isDirectory ? 1 : 0); // "Is directory" attribute
|
||||
mem.write8(attributePointer + 1, isHidden ? 1 : 0); // "Is hidden" attribute
|
||||
mem.write8(attributePointer + 2, entry.isDirectory ? 0 : 1); // "Is archive" attribute
|
||||
mem.write8(attributePointer + 3, 0); // "Is read-only" attribute
|
||||
|
||||
count++; // Increment number of read directories
|
||||
session->currentEntry++; // Increment index of the entry currently being read
|
||||
}
|
||||
|
||||
mem.write32(messagePointer, IPC::responseHeader(0x801, 2, 2));
|
||||
mem.write32(messagePointer + 4, Result::Success);
|
||||
mem.write32(messagePointer + 8, 0);
|
||||
mem.write32(messagePointer + 8, count);
|
||||
}
|
||||
|
|
|
@ -95,14 +95,29 @@ KernelObject* Kernel::getProcessFromPID(Handle handle) {
|
|||
}
|
||||
|
||||
void Kernel::deleteObjectData(KernelObject& object) {
|
||||
using enum KernelObjectType;
|
||||
|
||||
// Resource limit and thread objects do not allocate heap data, so we don't delete anything
|
||||
if (object.data == nullptr || object.type == ResourceLimit || object.type == Thread) {
|
||||
if (object.data == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
delete object.data;
|
||||
// Resource limit and thread objects do not allocate heap data, so we don't delete anything
|
||||
|
||||
switch (object.type) {
|
||||
case KernelObjectType::AddressArbiter: delete object.getData<AddressArbiter>(); return;
|
||||
case KernelObjectType::Archive: delete object.getData<ArchiveSession>(); return;
|
||||
case KernelObjectType::Directory: delete object.getData<DirectorySession>(); return;
|
||||
case KernelObjectType::Event: delete object.getData<Event>(); return;
|
||||
case KernelObjectType::File: delete object.getData<FileSession>(); return;
|
||||
case KernelObjectType::MemoryBlock: delete object.getData<MemoryBlock>(); return;
|
||||
case KernelObjectType::Port: delete object.getData<Port>(); return;
|
||||
case KernelObjectType::Process: delete object.getData<Process>(); return;
|
||||
case KernelObjectType::ResourceLimit: return;
|
||||
case KernelObjectType::Session: delete object.getData<Session>(); return;
|
||||
case KernelObjectType::Mutex: delete object.getData<Mutex>(); return;
|
||||
case KernelObjectType::Semaphore: delete object.getData<Semaphore>(); return;
|
||||
case KernelObjectType::Thread: return;
|
||||
case KernelObjectType::Dummy: return;
|
||||
default: [[unlikely]] Helpers::warn("unknown object type"); return;
|
||||
}
|
||||
}
|
||||
|
||||
void Kernel::reset() {
|
||||
|
@ -240,4 +255,4 @@ std::string Kernel::getProcessName(u32 pid) {
|
|||
} else {
|
||||
Helpers::panic("Attempted to name non-current process");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ static constexpr u32 signExtend3To32(u32 val) {
|
|||
return (u32)(s32(val) << 29 >> 29);
|
||||
}
|
||||
|
||||
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* data) {
|
||||
u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, std::span<const u8> data) {
|
||||
// Pixel offset of the 8x8 tile based on u, v and the width of the texture
|
||||
u32 offs = ((u & ~7) * 8) + ((v & ~7) * width);
|
||||
if (!hasAlpha)
|
||||
|
@ -30,8 +30,7 @@ u32 Texture::getTexelETC(bool hasAlpha, u32 u, u32 v, u32 width, const void* dat
|
|||
offs += subTileSize * subTileIndex;
|
||||
|
||||
u32 alpha;
|
||||
const u8* tmp = static_cast<const u8*>(data) + offs; // Pointer to colour and alpha data as u8*
|
||||
const u64* ptr = reinterpret_cast<const u64*>(tmp); // Cast to u64*
|
||||
const u64* ptr = reinterpret_cast<const u64*>(data.data() + offs); // Cast to u64*
|
||||
|
||||
if (hasAlpha) {
|
||||
// First 64 bits of the 4x4 subtile are alpha data
|
||||
|
@ -118,4 +117,4 @@ u32 Texture::decodeETC(u32 alpha, u32 u, u32 v, u64 colourData) {
|
|||
b = std::clamp(b + modifier, 0, 255);
|
||||
|
||||
return (alpha << 24) | (u32(b) << 16) | (u32(g) << 8) | u32(r);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include "gl_state.hpp"
|
||||
#include "renderer_gl/gl_state.hpp"
|
||||
|
||||
void GLStateManager::resetBlend() {
|
||||
blendEnabled = false;
|
|
@ -1,582 +1,22 @@
|
|||
#include "renderer_gl/renderer_gl.hpp"
|
||||
|
||||
#include <stb_image_write.h>
|
||||
|
||||
#include <cmrc/cmrc.hpp>
|
||||
|
||||
#include "PICA/float_types.hpp"
|
||||
#include "PICA/gpu.hpp"
|
||||
#include "PICA/regs.hpp"
|
||||
|
||||
CMRC_DECLARE(RendererGL);
|
||||
|
||||
using namespace Floats;
|
||||
using namespace Helpers;
|
||||
using namespace PICA;
|
||||
|
||||
const char* vertexShader = R"(
|
||||
#version 410 core
|
||||
|
||||
layout (location = 0) in vec4 a_coords;
|
||||
layout (location = 1) in vec4 a_quaternion;
|
||||
layout (location = 2) in vec4 a_vertexColour;
|
||||
layout (location = 3) in vec2 a_texcoord0;
|
||||
layout (location = 4) in vec2 a_texcoord1;
|
||||
layout (location = 5) in float a_texcoord0_w;
|
||||
layout (location = 6) in vec3 a_view;
|
||||
layout (location = 7) in vec2 a_texcoord2;
|
||||
RendererGL::~RendererGL() {}
|
||||
|
||||
out vec3 v_normal;
|
||||
out vec3 v_tangent;
|
||||
out vec3 v_bitangent;
|
||||
out vec4 v_colour;
|
||||
out vec3 v_texcoord0;
|
||||
out vec2 v_texcoord1;
|
||||
out vec3 v_view;
|
||||
out vec2 v_texcoord2;
|
||||
flat out vec4 v_textureEnvColor[6];
|
||||
flat out vec4 v_textureEnvBufferColor;
|
||||
|
||||
out float gl_ClipDistance[2];
|
||||
|
||||
// TEV uniforms
|
||||
uniform uint u_textureEnvColor[6];
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
|
||||
// Helper so that the implementation of u_pica_regs can be changed later
|
||||
uint readPicaReg(uint reg_addr){
|
||||
return u_picaRegs[reg_addr - 0x48];
|
||||
}
|
||||
|
||||
vec4 abgr8888ToVec4(uint abgr) {
|
||||
const float scale = 1.0 / 255.0;
|
||||
|
||||
return scale * vec4(
|
||||
float(abgr & 0xffu),
|
||||
float((abgr >> 8) & 0xffu),
|
||||
float((abgr >> 16) & 0xffu),
|
||||
float(abgr >> 24)
|
||||
);
|
||||
}
|
||||
|
||||
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){
|
||||
vec3 u = q.xyz;
|
||||
float s = q.w;
|
||||
return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v + 2.0 * s * cross(u, v);
|
||||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
float decodeFP(uint hex, uint E, uint M){
|
||||
uint width = M + E + 1u;
|
||||
uint bias = 128u - (1u << (E - 1u));
|
||||
uint exponent = (hex >> M) & ((1u << E) - 1u);
|
||||
uint mantissa = hex & ((1u << M) - 1u);
|
||||
uint sign = (hex >> (E + M)) << 31u;
|
||||
|
||||
if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
|
||||
if (exponent == (1u << E) - 1u) exponent = 255u;
|
||||
else exponent += bias;
|
||||
hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
|
||||
} else {
|
||||
hex = sign;
|
||||
}
|
||||
|
||||
return uintBitsToFloat(hex);
|
||||
}
|
||||
|
||||
void main() {
|
||||
gl_Position = a_coords;
|
||||
v_colour = a_vertexColour;
|
||||
|
||||
// Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA
|
||||
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
|
||||
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
|
||||
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
|
||||
v_view = a_view;
|
||||
|
||||
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
|
||||
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
|
||||
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
|
||||
}
|
||||
|
||||
v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD));
|
||||
|
||||
// Parse clipping plane registers
|
||||
// The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0
|
||||
// With n = (A, B, C) being the normal vector and D being the origin point distance
|
||||
// Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1]
|
||||
vec4 clipData = vec4(
|
||||
decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16),
|
||||
decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16),
|
||||
decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16),
|
||||
decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16)
|
||||
);
|
||||
|
||||
// There's also another, always-on clipping plane based on vertex z
|
||||
gl_ClipDistance[0] = -a_coords.z;
|
||||
gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
}
|
||||
)";
|
||||
|
||||
const char* fragmentShader = R"(
|
||||
#version 410 core
|
||||
|
||||
in vec3 v_tangent;
|
||||
in vec3 v_normal;
|
||||
in vec3 v_bitangent;
|
||||
in vec4 v_colour;
|
||||
in vec3 v_texcoord0;
|
||||
in vec2 v_texcoord1;
|
||||
in vec3 v_view;
|
||||
in vec2 v_texcoord2;
|
||||
flat in vec4 v_textureEnvColor[6];
|
||||
flat in vec4 v_textureEnvBufferColor;
|
||||
|
||||
out vec4 fragColour;
|
||||
|
||||
// TEV uniforms
|
||||
uniform uint u_textureEnvSource[6];
|
||||
uniform uint u_textureEnvOperand[6];
|
||||
uniform uint u_textureEnvCombiner[6];
|
||||
uniform uint u_textureEnvScale[6];
|
||||
|
||||
// Depth control uniforms
|
||||
uniform float u_depthScale;
|
||||
uniform float u_depthOffset;
|
||||
uniform bool u_depthmapEnable;
|
||||
|
||||
uniform sampler2D u_tex0;
|
||||
uniform sampler2D u_tex1;
|
||||
uniform sampler2D u_tex2;
|
||||
uniform sampler1DArray u_tex_lighting_lut;
|
||||
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
|
||||
// Helper so that the implementation of u_pica_regs can be changed later
|
||||
uint readPicaReg(uint reg_addr){
|
||||
return u_picaRegs[reg_addr - 0x48];
|
||||
}
|
||||
|
||||
vec4 tevSources[16];
|
||||
vec4 tevNextPreviousBuffer;
|
||||
bool tevUnimplementedSourceFlag = false;
|
||||
|
||||
// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
|
||||
// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml
|
||||
|
||||
vec4 tevFetchSource(uint src_id) {
|
||||
if (src_id >= 6u && src_id < 13u) {
|
||||
tevUnimplementedSourceFlag = true;
|
||||
}
|
||||
|
||||
return tevSources[src_id];
|
||||
}
|
||||
|
||||
vec4 tevGetColorAndAlphaSource(int tev_id, int src_id) {
|
||||
vec4 result;
|
||||
|
||||
vec4 colorSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4)) & 15u);
|
||||
vec4 alphaSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u);
|
||||
|
||||
uint colorOperand = (u_textureEnvOperand[tev_id] >> (src_id * 4)) & 15u;
|
||||
uint alphaOperand = (u_textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u;
|
||||
|
||||
// TODO: figure out what the undocumented values do
|
||||
switch (colorOperand) {
|
||||
case 0u: result.rgb = colorSource.rgb; break; // Source color
|
||||
case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color
|
||||
case 2u: result.rgb = vec3(colorSource.a); break; // Source alpha
|
||||
case 3u: result.rgb = vec3(1.0 - colorSource.a); break; // One minus source alpha
|
||||
case 4u: result.rgb = vec3(colorSource.r); break; // Source red
|
||||
case 5u: result.rgb = vec3(1.0 - colorSource.r); break; // One minus source red
|
||||
case 8u: result.rgb = vec3(colorSource.g); break; // Source green
|
||||
case 9u: result.rgb = vec3(1.0 - colorSource.g); break; // One minus source green
|
||||
case 12u: result.rgb = vec3(colorSource.b); break; // Source blue
|
||||
case 13u: result.rgb = vec3(1.0 - colorSource.b); break; // One minus source blue
|
||||
default: break;
|
||||
}
|
||||
|
||||
// TODO: figure out what the undocumented values do
|
||||
switch (alphaOperand) {
|
||||
case 0u: result.a = alphaSource.a; break; // Source alpha
|
||||
case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha
|
||||
case 2u: result.a = alphaSource.r; break; // Source red
|
||||
case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red
|
||||
case 4u: result.a = alphaSource.g; break; // Source green
|
||||
case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green
|
||||
case 6u: result.a = alphaSource.b; break; // Source blue
|
||||
case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue
|
||||
default: break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 tevCalculateCombiner(int tev_id) {
|
||||
vec4 source0 = tevGetColorAndAlphaSource(tev_id, 0);
|
||||
vec4 source1 = tevGetColorAndAlphaSource(tev_id, 1);
|
||||
vec4 source2 = tevGetColorAndAlphaSource(tev_id, 2);
|
||||
|
||||
uint colorCombine = u_textureEnvCombiner[tev_id] & 15u;
|
||||
uint alphaCombine = (u_textureEnvCombiner[tev_id] >> 16) & 15u;
|
||||
|
||||
vec4 result = vec4(1.0);
|
||||
|
||||
// TODO: figure out what the undocumented values do
|
||||
switch (colorCombine) {
|
||||
case 0u: result.rgb = source0.rgb; break; // Replace
|
||||
case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate
|
||||
case 2u: result.rgb = min(vec3(1.0), source0.rgb + source1.rgb); break; // Add
|
||||
case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed
|
||||
case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate
|
||||
case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract
|
||||
case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5 , source1.rgb - 0.5)); break; // Dot3 RGB
|
||||
case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5 , source1.rgb - 0.5)); break; // Dot3 RGBA
|
||||
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
|
||||
case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode.
|
||||
// TODO: figure out what the undocumented values do
|
||||
// TODO: test if the alpha combiner supports all the same modes as the color combiner.
|
||||
switch (alphaCombine) {
|
||||
case 0u: result.a = source0.a; break; // Replace
|
||||
case 1u: result.a = source0.a * source1.a; break; // Modulate
|
||||
case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add
|
||||
case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed
|
||||
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
|
||||
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
|
||||
case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add
|
||||
case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
result.rgb *= float(1 << (u_textureEnvScale[tev_id] & 3u));
|
||||
result.a *= float(1 << ((u_textureEnvScale[tev_id] >> 16) & 3u));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define D0_LUT 0u
|
||||
#define D1_LUT 1u
|
||||
#define SP_LUT 2u
|
||||
#define FR_LUT 3u
|
||||
#define RB_LUT 4u
|
||||
#define RG_LUT 5u
|
||||
#define RR_LUT 6u
|
||||
|
||||
float lutLookup(uint lut, uint light, float value){
|
||||
if (lut >= FR_LUT && lut <= RR_LUT)
|
||||
lut -= 1;
|
||||
if (lut==SP_LUT)
|
||||
lut = light + 8;
|
||||
return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
}
|
||||
|
||||
vec3 regToColor(uint reg) {
|
||||
// Normalization scale to convert from [0...255] to [0.0...1.0]
|
||||
const float scale = 1.0 / 255.0;
|
||||
|
||||
return scale * vec3(
|
||||
float(bitfieldExtract(reg, 20, 8)),
|
||||
float(bitfieldExtract(reg, 10, 8)),
|
||||
float(bitfieldExtract(reg, 00, 8))
|
||||
);
|
||||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
float decodeFP(uint hex, uint E, uint M){
|
||||
uint width = M + E + 1u;
|
||||
uint bias = 128u - (1u << (E - 1u));
|
||||
uint exponent = (hex >> M) & ((1u << E) - 1u);
|
||||
uint mantissa = hex & ((1u << M) - 1u);
|
||||
uint sign = (hex >> (E + M)) << 31u;
|
||||
|
||||
if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
|
||||
if (exponent == (1u << E) - 1u) exponent = 255u;
|
||||
else exponent += bias;
|
||||
hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
|
||||
} else {
|
||||
hex = sign;
|
||||
}
|
||||
|
||||
return uintBitsToFloat(hex);
|
||||
}
|
||||
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(out vec4 primary_color, out vec4 secondary_color){
|
||||
// Quaternions describe a transformation from surface-local space to eye space.
|
||||
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
|
||||
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
|
||||
vec3 normal = normalize(v_normal );
|
||||
vec3 tangent = normalize(v_tangent );
|
||||
vec3 bitangent = normalize(v_bitangent);
|
||||
vec3 view = normalize(v_view);
|
||||
|
||||
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F);
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){
|
||||
primary_color = secondary_color = vec4(1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0);
|
||||
uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1;
|
||||
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9);
|
||||
|
||||
primary_color = vec4(vec3(0.0),1.0);
|
||||
secondary_color = vec4(vec3(0.0),1.0);
|
||||
|
||||
primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
|
||||
|
||||
uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1);
|
||||
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3);
|
||||
uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2);
|
||||
float d[7];
|
||||
|
||||
bool error_unimpl = false;
|
||||
|
||||
for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
|
||||
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3);
|
||||
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id);
|
||||
|
||||
vec3 light_vector = normalize(vec3(
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
|
||||
));
|
||||
|
||||
// Positional Light
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0)
|
||||
error_unimpl = true;
|
||||
|
||||
vec3 half_vector = normalize(normalize(light_vector) + view);
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){
|
||||
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u)
|
||||
scale/=256.0;
|
||||
|
||||
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
if (input_id == 0u) d[c] = dot(normal,half_vector);
|
||||
else if (input_id == 1u) d[c] = dot(view,half_vector);
|
||||
else if (input_id == 2u) d[c] = dot(normal,view);
|
||||
else if (input_id == 3u) d[c] = dot(light_vector,normal);
|
||||
else if (input_id == 4u){
|
||||
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id);
|
||||
vec3 spot_light_vector = normalize(vec3(
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11)
|
||||
));
|
||||
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
|
||||
} else if (input_id == 5u) {
|
||||
d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
|
||||
error_unimpl = true;
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
|
||||
d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale;
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u)
|
||||
d[c] = abs(d[c]);
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
|
||||
if (lookup_config == 0) {
|
||||
d[D1_LUT] = 0.0;
|
||||
d[FR_LUT] = 0.0;
|
||||
d[RG_LUT]= d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 1) {
|
||||
d[D0_LUT] = 0.0;
|
||||
d[D1_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 2) {
|
||||
d[FR_LUT] = 0.0;
|
||||
d[SP_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 3) {
|
||||
d[SP_LUT] = 0.0;
|
||||
d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0;
|
||||
} else if (lookup_config == 4) {
|
||||
d[FR_LUT] = 0.0;
|
||||
} else if (lookup_config == 5) {
|
||||
d[D1_LUT] = 0.0;
|
||||
} else if (lookup_config == 6) {
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
}
|
||||
|
||||
float distance_factor = 1.0; // a
|
||||
float indirect_factor = 1.0; // fi
|
||||
float shadow_factor = 1.0; // o
|
||||
|
||||
float NdotL = dot(normal, light_vector); //Li dot N
|
||||
|
||||
// Two sided diffuse
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL);
|
||||
else NdotL = abs(NdotL);
|
||||
|
||||
float light_factor = distance_factor*d[SP_LUT]*indirect_factor*shadow_factor;
|
||||
|
||||
primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL);
|
||||
secondary_color.rgb += light_factor * (
|
||||
regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
|
||||
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])
|
||||
);
|
||||
}
|
||||
uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
|
||||
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
|
||||
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
|
||||
|
||||
if (error_unimpl) {
|
||||
secondary_color = primary_color = vec4(1.0,0.,1.0,1.0);
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
// TODO: what do invalid sources and disabled textures read as?
|
||||
// And what does the "previous combiner" source read initially?
|
||||
tevSources[0] = v_colour; // Primary/vertex color
|
||||
calcLighting(tevSources[1],tevSources[2]);
|
||||
|
||||
uint textureConfig = readPicaReg(0x80);
|
||||
vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2;
|
||||
|
||||
if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy);
|
||||
if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1);
|
||||
if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV);
|
||||
tevSources[13] = vec4(0.0); // Previous buffer
|
||||
tevSources[15] = vec4(0.0); // Previous combiner
|
||||
|
||||
tevNextPreviousBuffer = v_textureEnvBufferColor;
|
||||
uint textureEnvUpdateBuffer = readPicaReg(0xE0);
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
tevSources[14] = v_textureEnvColor[i]; // Constant color
|
||||
tevSources[15] = tevCalculateCombiner(i);
|
||||
tevSources[13] = tevNextPreviousBuffer;
|
||||
|
||||
if (i < 4) {
|
||||
if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) {
|
||||
tevNextPreviousBuffer.rgb = tevSources[15].rgb;
|
||||
}
|
||||
|
||||
if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) {
|
||||
tevNextPreviousBuffer.a = tevSources[15].a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragColour = tevSources[15];
|
||||
|
||||
if (tevUnimplementedSourceFlag) {
|
||||
// fragColour = vec4(1.0, 0.0, 1.0, 1.0);
|
||||
}
|
||||
// fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
|
||||
|
||||
|
||||
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
|
||||
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
|
||||
float z_over_w = gl_FragCoord.z * 2.0f - 1.0f;
|
||||
float depth = z_over_w * u_depthScale + u_depthOffset;
|
||||
|
||||
if (!u_depthmapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering)
|
||||
depth /= gl_FragCoord.w;
|
||||
|
||||
// Write final fragment depth
|
||||
gl_FragDepth = depth;
|
||||
|
||||
// Perform alpha test
|
||||
uint alphaControl = readPicaReg(0x104);
|
||||
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
|
||||
uint func = (alphaControl >> 4u) & 7u;
|
||||
float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;
|
||||
float alpha = fragColour.a;
|
||||
|
||||
switch (func) {
|
||||
case 0: discard; // Never pass alpha test
|
||||
case 1: break; // Always pass alpha test
|
||||
case 2: // Pass if equal
|
||||
if (alpha != reference)
|
||||
discard;
|
||||
break;
|
||||
case 3: // Pass if not equal
|
||||
if (alpha == reference)
|
||||
discard;
|
||||
break;
|
||||
case 4: // Pass if less than
|
||||
if (alpha >= reference)
|
||||
discard;
|
||||
break;
|
||||
case 5: // Pass if less than or equal
|
||||
if (alpha > reference)
|
||||
discard;
|
||||
break;
|
||||
case 6: // Pass if greater than
|
||||
if (alpha <= reference)
|
||||
discard;
|
||||
break;
|
||||
case 7: // Pass if greater than or equal
|
||||
if (alpha < reference)
|
||||
discard;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
const char* displayVertexShader = R"(
|
||||
#version 410 core
|
||||
out vec2 UV;
|
||||
|
||||
void main() {
|
||||
const vec4 positions[4] = vec4[](
|
||||
vec4(-1.0, 1.0, 1.0, 1.0), // Top-left
|
||||
vec4(1.0, 1.0, 1.0, 1.0), // Top-right
|
||||
vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left
|
||||
vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right
|
||||
);
|
||||
|
||||
// The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise
|
||||
// So we adjust our texcoords accordingly
|
||||
const vec2 texcoords[4] = vec2[](
|
||||
vec2(1.0, 1.0), // Top-right
|
||||
vec2(1.0, 0.0), // Bottom-right
|
||||
vec2(0.0, 1.0), // Top-left
|
||||
vec2(0.0, 0.0) // Bottom-left
|
||||
);
|
||||
|
||||
gl_Position = positions[gl_VertexID];
|
||||
UV = texcoords[gl_VertexID];
|
||||
}
|
||||
)";
|
||||
|
||||
const char* displayFragmentShader = R"(
|
||||
#version 410 core
|
||||
in vec2 UV;
|
||||
out vec4 FragColor;
|
||||
|
||||
uniform sampler2D u_texture;
|
||||
void main() {
|
||||
FragColor = texture(u_texture, UV);
|
||||
}
|
||||
)";
|
||||
|
||||
void Renderer::reset() {
|
||||
void RendererGL::reset() {
|
||||
depthBufferCache.reset();
|
||||
colourBufferCache.reset();
|
||||
textureCache.reset();
|
||||
|
@ -592,10 +32,10 @@ void Renderer::reset() {
|
|||
const auto oldProgram = OpenGL::getProgram();
|
||||
|
||||
gl.useProgram(triangleProgram);
|
||||
|
||||
oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use
|
||||
oldDepthOffset = 0.0; // Default depth offset to 0
|
||||
oldDepthmapEnable = false; // Enable w buffering
|
||||
|
||||
oldDepthScale = -1.0; // Default depth scale to -1.0, which is what games typically use
|
||||
oldDepthOffset = 0.0; // Default depth offset to 0
|
||||
oldDepthmapEnable = false; // Enable w buffering
|
||||
|
||||
glUniform1f(depthScaleLoc, oldDepthScale);
|
||||
glUniform1f(depthOffsetLoc, oldDepthOffset);
|
||||
|
@ -605,10 +45,17 @@ void Renderer::reset() {
|
|||
}
|
||||
}
|
||||
|
||||
void Renderer::initGraphicsContext() {
|
||||
OpenGL::Shader vert(vertexShader, OpenGL::Vertex);
|
||||
OpenGL::Shader frag(fragmentShader, OpenGL::Fragment);
|
||||
triangleProgram.create({ vert, frag });
|
||||
void RendererGL::initGraphicsContext() {
|
||||
gl.reset();
|
||||
|
||||
auto gl_resources = cmrc::RendererGL::get_filesystem();
|
||||
|
||||
auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert");
|
||||
auto fragmentShaderSource = gl_resources.open("opengl_fragment_shader.frag");
|
||||
|
||||
OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex);
|
||||
OpenGL::Shader frag({fragmentShaderSource.begin(), fragmentShaderSource.size()}, OpenGL::Fragment);
|
||||
triangleProgram.create({vert, frag});
|
||||
gl.useProgram(triangleProgram);
|
||||
|
||||
textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
|
||||
|
@ -628,12 +75,15 @@ void Renderer::initGraphicsContext() {
|
|||
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
|
||||
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
|
||||
|
||||
OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
|
||||
OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
|
||||
displayProgram.create({ vertDisplay, fragDisplay });
|
||||
auto displayVertexShaderSource = gl_resources.open("opengl_display.vert");
|
||||
auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag");
|
||||
|
||||
OpenGL::Shader vertDisplay({displayVertexShaderSource.begin(), displayVertexShaderSource.size()}, OpenGL::Vertex);
|
||||
OpenGL::Shader fragDisplay({displayFragmentShaderSource.begin(), displayFragmentShaderSource.size()}, OpenGL::Fragment);
|
||||
displayProgram.create({vertDisplay, fragDisplay});
|
||||
|
||||
gl.useProgram(displayProgram);
|
||||
glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object
|
||||
glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object
|
||||
|
||||
vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW);
|
||||
gl.bindVBO(vbo);
|
||||
|
@ -669,10 +119,10 @@ void Renderer::initGraphicsContext() {
|
|||
dummyVAO.create();
|
||||
|
||||
// Create texture and framebuffer for the 3DS screen
|
||||
const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320
|
||||
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
|
||||
|
||||
glGenTextures(1,&lightLUTTextureArray);
|
||||
const u32 screenTextureWidth = 400; // Top screen is 400 pixels wide, bottom is 320
|
||||
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
|
||||
|
||||
glGenTextures(1, &lightLUTTextureArray);
|
||||
|
||||
auto prevTexture = OpenGL::getTex2D();
|
||||
screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
|
||||
|
@ -684,8 +134,7 @@ void Renderer::initGraphicsContext() {
|
|||
screenFramebuffer.createWithDrawTexture(screenTexture);
|
||||
screenFramebuffer.bind(OpenGL::DrawAndReadFramebuffer);
|
||||
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
|
||||
Helpers::panic("Incomplete framebuffer");
|
||||
if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) Helpers::panic("Incomplete framebuffer");
|
||||
|
||||
// TODO: This should not clear the framebuffer contents. It should load them from VRAM.
|
||||
GLint oldViewport[4];
|
||||
|
@ -699,19 +148,32 @@ void Renderer::initGraphicsContext() {
|
|||
}
|
||||
|
||||
// Set up the OpenGL blending context to match the emulated PICA
|
||||
void Renderer::setupBlending() {
|
||||
void RendererGL::setupBlending() {
|
||||
const bool blendingEnabled = (regs[PICA::InternalRegs::ColourOperation] & (1 << 8)) != 0;
|
||||
|
||||
|
||||
// Map of PICA blending equations to OpenGL blending equations. The unused blending equations are equivalent to equation 0 (add)
|
||||
static constexpr std::array<GLenum, 8> blendingEquations = {
|
||||
GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, GL_FUNC_ADD, GL_FUNC_ADD
|
||||
GL_FUNC_ADD, GL_FUNC_SUBTRACT, GL_FUNC_REVERSE_SUBTRACT, GL_MIN, GL_MAX, GL_FUNC_ADD, GL_FUNC_ADD, GL_FUNC_ADD,
|
||||
};
|
||||
|
||||
|
||||
// Map of PICA blending funcs to OpenGL blending funcs. Func = 15 is undocumented and stubbed to GL_ONE for now
|
||||
static constexpr std::array<GLenum, 16> blendingFuncs = {
|
||||
GL_ZERO, GL_ONE, GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, GL_DST_COLOR, GL_ONE_MINUS_DST_COLOR, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA, GL_ONE_MINUS_DST_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_CONSTANT_COLOR, GL_CONSTANT_ALPHA, GL_ONE_MINUS_CONSTANT_ALPHA,
|
||||
GL_SRC_ALPHA_SATURATE, GL_ONE
|
||||
GL_ZERO,
|
||||
GL_ONE,
|
||||
GL_SRC_COLOR,
|
||||
GL_ONE_MINUS_SRC_COLOR,
|
||||
GL_DST_COLOR,
|
||||
GL_ONE_MINUS_DST_COLOR,
|
||||
GL_SRC_ALPHA,
|
||||
GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA,
|
||||
GL_ONE_MINUS_DST_ALPHA,
|
||||
GL_CONSTANT_COLOR,
|
||||
GL_ONE_MINUS_CONSTANT_COLOR,
|
||||
GL_CONSTANT_ALPHA,
|
||||
GL_ONE_MINUS_CONSTANT_ALPHA,
|
||||
GL_SRC_ALPHA_SATURATE,
|
||||
GL_ONE,
|
||||
};
|
||||
|
||||
if (!blendingEnabled) {
|
||||
|
@ -743,13 +205,12 @@ void Renderer::setupBlending() {
|
|||
}
|
||||
}
|
||||
|
||||
void Renderer::setupTextureEnvState() {
|
||||
void RendererGL::setupTextureEnvState() {
|
||||
// TODO: Only update uniforms when the TEV config changed. Use an UBO potentially.
|
||||
|
||||
static constexpr std::array<u32, 6> ioBases = {
|
||||
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source,
|
||||
PICA::InternalRegs::TexEnv2Source, PICA::InternalRegs::TexEnv3Source,
|
||||
PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source
|
||||
PICA::InternalRegs::TexEnv0Source, PICA::InternalRegs::TexEnv1Source, PICA::InternalRegs::TexEnv2Source,
|
||||
PICA::InternalRegs::TexEnv3Source, PICA::InternalRegs::TexEnv4Source, PICA::InternalRegs::TexEnv5Source,
|
||||
};
|
||||
|
||||
u32 textureEnvSourceRegs[6];
|
||||
|
@ -775,9 +236,11 @@ void Renderer::setupTextureEnvState() {
|
|||
glUniform1uiv(textureEnvScaleLoc, 6, textureEnvScaleRegs);
|
||||
}
|
||||
|
||||
void Renderer::bindTexturesToSlots() {
|
||||
void RendererGL::bindTexturesToSlots() {
|
||||
static constexpr std::array<u32, 3> ioBases = {
|
||||
PICA::InternalRegs::Tex0BorderColor, PICA::InternalRegs::Tex1BorderColor, PICA::InternalRegs::Tex2BorderColor
|
||||
PICA::InternalRegs::Tex0BorderColor,
|
||||
PICA::InternalRegs::Tex1BorderColor,
|
||||
PICA::InternalRegs::Tex2BorderColor,
|
||||
};
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
@ -805,13 +268,13 @@ void Renderer::bindTexturesToSlots() {
|
|||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void Renderer::updateLightingLUT() {
|
||||
void RendererGL::updateLightingLUT() {
|
||||
gpu.lightingLUTDirty = false;
|
||||
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
|
||||
std::array<u16, GPU::LightingLutSize> u16_lightinglut;
|
||||
|
||||
for (int i = 0; i < gpu.lightingLUT.size(); i++) {
|
||||
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
u16_lightinglut[i] = value * 65535 / 4095;
|
||||
uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1);
|
||||
u16_lightinglut[i] = value * 65535 / 4095;
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 3);
|
||||
|
@ -824,19 +287,22 @@ void Renderer::updateLightingLUT() {
|
|||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
void Renderer::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
|
||||
void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> vertices) {
|
||||
// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
|
||||
static constexpr std::array<OpenGL::Primitives, 4> primTypes = {
|
||||
OpenGL::Triangle, OpenGL::TriangleStrip, OpenGL::TriangleFan, OpenGL::Triangle
|
||||
OpenGL::Triangle,
|
||||
OpenGL::TriangleStrip,
|
||||
OpenGL::TriangleFan,
|
||||
OpenGL::Triangle,
|
||||
};
|
||||
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
|
||||
|
||||
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
|
||||
gl.disableScissor();
|
||||
gl.bindVBO(vbo);
|
||||
gl.bindVAO(vao);
|
||||
gl.useProgram(triangleProgram);
|
||||
|
||||
OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled
|
||||
OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled
|
||||
if (regs[PICA::InternalRegs::ClipEnable] & 1) {
|
||||
OpenGL::enableClipPlane(1);
|
||||
}
|
||||
|
@ -852,9 +318,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const Vertex> ver
|
|||
const int colourMask = getBits<8, 4>(depthControl);
|
||||
gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8);
|
||||
|
||||
static constexpr std::array<GLenum, 8> depthModes = {
|
||||
GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL
|
||||
};
|
||||
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
|
||||
|
||||
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
|
||||
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
|
||||
|
@ -865,7 +329,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const Vertex> ver
|
|||
oldDepthScale = depthScale;
|
||||
glUniform1f(depthScaleLoc, depthScale);
|
||||
}
|
||||
|
||||
|
||||
if (oldDepthOffset != depthOffset) {
|
||||
oldDepthOffset = depthOffset;
|
||||
glUniform1f(depthOffsetLoc, depthOffset);
|
||||
|
@ -917,7 +381,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const Vertex> ver
|
|||
constexpr u32 topScreenBuffer = 0x1f000000;
|
||||
constexpr u32 bottomScreenBuffer = 0x1f05dc00;
|
||||
|
||||
void Renderer::display() {
|
||||
void RendererGL::display() {
|
||||
gl.disableScissor();
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
|
||||
|
@ -925,7 +389,7 @@ void Renderer::display() {
|
|||
glBlitFramebuffer(0, 0, 400, 480, 0, 0, 400, 480, GL_COLOR_BUFFER_BIT, GL_LINEAR);
|
||||
}
|
||||
|
||||
void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
|
||||
void RendererGL::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {
|
||||
return;
|
||||
log("GPU: Clear buffer\nStart: %08X End: %08X\nValue: %08X Control: %08X\n", startAddress, endAddress, value, control);
|
||||
|
||||
|
@ -947,10 +411,10 @@ void Renderer::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 cont
|
|||
OpenGL::clearColor();
|
||||
}
|
||||
|
||||
OpenGL::Framebuffer Renderer::getColourFBO() {
|
||||
//We construct a colour buffer object and see if our cache has any matching colour buffers in it
|
||||
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
|
||||
ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize.x(), fbSize.y());
|
||||
OpenGL::Framebuffer RendererGL::getColourFBO() {
|
||||
// We construct a colour buffer object and see if our cache has any matching colour buffers in it
|
||||
// If not, we allocate a texture & FBO for our framebuffer and store it in the cache
|
||||
ColourBuffer sampleBuffer(colourBufferLoc, colourBufferFormat, fbSize[0], fbSize[1]);
|
||||
auto buffer = colourBufferCache.find(sampleBuffer);
|
||||
|
||||
if (buffer.has_value()) {
|
||||
|
@ -960,9 +424,9 @@ OpenGL::Framebuffer Renderer::getColourFBO() {
|
|||
}
|
||||
}
|
||||
|
||||
void Renderer::bindDepthBuffer() {
|
||||
void RendererGL::bindDepthBuffer() {
|
||||
// Similar logic as the getColourFBO function
|
||||
DepthBuffer sampleBuffer(depthBufferLoc, depthBufferFormat, fbSize.x(), fbSize.y());
|
||||
DepthBuffer sampleBuffer(depthBufferLoc, depthBufferFormat, fbSize[0], fbSize[1]);
|
||||
auto buffer = depthBufferCache.find(sampleBuffer);
|
||||
GLuint tex;
|
||||
|
||||
|
@ -979,14 +443,14 @@ void Renderer::bindDepthBuffer() {
|
|||
glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0);
|
||||
}
|
||||
|
||||
OpenGL::Texture Renderer::getTexture(Texture& tex) {
|
||||
OpenGL::Texture RendererGL::getTexture(Texture& tex) {
|
||||
// Similar logic as the getColourFBO/bindDepthBuffer functions
|
||||
auto buffer = textureCache.find(tex);
|
||||
|
||||
if (buffer.has_value()) {
|
||||
return buffer.value().get().texture;
|
||||
} else {
|
||||
const void* textureData = gpu.getPointerPhys<void*>(tex.location); // Get pointer to the texture data in 3DS memory
|
||||
const auto textureData = std::span{gpu.getPointerPhys<u8>(tex.location), tex.sizeInBytes()}; // Get pointer to the texture data in 3DS memory
|
||||
Texture& newTex = textureCache.add(tex);
|
||||
newTex.decodeTexture(textureData);
|
||||
|
||||
|
@ -994,7 +458,7 @@ OpenGL::Texture Renderer::getTexture(Texture& tex) {
|
|||
}
|
||||
}
|
||||
|
||||
void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
void RendererGL::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {
|
||||
const u32 inputWidth = inputSize & 0xffff;
|
||||
const u32 inputGap = inputSize >> 16;
|
||||
|
||||
|
@ -1022,21 +486,21 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32
|
|||
// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
|
||||
// We consider output gap == 320 to mean bottom, and anything else to mean top
|
||||
if (outputGap == 320) {
|
||||
OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport
|
||||
OpenGL::setViewport(40, 0, 320, 240); // Bottom screen viewport
|
||||
} else {
|
||||
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
|
||||
OpenGL::setViewport(0, 240, 400, 240); // Top screen viewport
|
||||
}
|
||||
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
|
||||
OpenGL::draw(OpenGL::TriangleStrip, 4); // Actually draw our 3DS screen
|
||||
}
|
||||
|
||||
void Renderer::screenshot(const std::string& name) {
|
||||
void RendererGL::screenshot(const std::string& name) {
|
||||
constexpr uint width = 400;
|
||||
constexpr uint height = 2 * 240;
|
||||
|
||||
std::vector<uint8_t> pixels, flippedPixels;
|
||||
pixels.resize(width * height * 4);
|
||||
flippedPixels.resize(pixels.size());;
|
||||
pixels.resize(width * height * 4);
|
||||
flippedPixels.resize(pixels.size());
|
||||
|
||||
OpenGL::bindScreenFramebuffer();
|
||||
glReadPixels(0, 0, width, height, GL_BGRA, GL_UNSIGNED_BYTE, pixels.data());
|
||||
|
@ -1053,4 +517,4 @@ void Renderer::screenshot(const std::string& name) {
|
|||
}
|
||||
|
||||
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -112,12 +112,11 @@ u32 Texture::getSwizzledOffset_4bpp(u32 u, u32 v, u32 width) {
|
|||
// Get the texel at position (u, v)
|
||||
// fmt: format of the texture
|
||||
// data: texture data of the texture
|
||||
u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
||||
u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, std::span<const u8> data) {
|
||||
switch (fmt) {
|
||||
case PICA::TextureFmt::RGBA4: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
u16 texel = u16(ptr[offset]) | (u16(ptr[offset + 1]) << 8);
|
||||
u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = Colour::convert4To8Bit(getBits<0, 4, u8>(texel));
|
||||
u8 b = Colour::convert4To8Bit(getBits<4, 4, u8>(texel));
|
||||
|
@ -128,9 +127,8 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
}
|
||||
|
||||
case PICA::TextureFmt::RGBA5551: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
u16 texel = u16(ptr[offset]) | (u16(ptr[offset + 1]) << 8);
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||
|
||||
u8 alpha = getBit<0>(texel) ? 0xff : 0;
|
||||
u8 b = Colour::convert5To8Bit(getBits<1, 5, u8>(texel));
|
||||
|
@ -141,56 +139,47 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
}
|
||||
|
||||
case PICA::TextureFmt::RGB565: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
u16 texel = u16(ptr[offset]) | (u16(ptr[offset + 1]) << 8);
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
const u16 texel = u16(data[offset]) | (u16(data[offset + 1]) << 8);
|
||||
|
||||
u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
|
||||
u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
|
||||
u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
|
||||
const u8 b = Colour::convert5To8Bit(getBits<0, 5, u8>(texel));
|
||||
const u8 g = Colour::convert6To8Bit(getBits<5, 6, u8>(texel));
|
||||
const u8 r = Colour::convert5To8Bit(getBits<11, 5, u8>(texel));
|
||||
|
||||
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RG8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
|
||||
constexpr u8 b = 0;
|
||||
u8 g = ptr[offset];
|
||||
u8 r = ptr[offset + 1];
|
||||
const u8 g = data[offset];
|
||||
const u8 r = data[offset + 1];
|
||||
|
||||
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RGB8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 3);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
|
||||
u8 b = ptr[offset];
|
||||
u8 g = ptr[offset + 1];
|
||||
u8 r = ptr[offset + 2];
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 3);
|
||||
const u8 b = data[offset];
|
||||
const u8 g = data[offset + 1];
|
||||
const u8 r = data[offset + 2];
|
||||
|
||||
return (0xff << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::RGBA8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 4);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
|
||||
u8 alpha = ptr[offset];
|
||||
u8 b = ptr[offset + 1];
|
||||
u8 g = ptr[offset + 2];
|
||||
u8 r = ptr[offset + 3];
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 4);
|
||||
const u8 alpha = data[offset];
|
||||
const u8 b = data[offset + 1];
|
||||
const u8 g = data[offset + 2];
|
||||
const u8 r = data[offset + 3];
|
||||
|
||||
return (alpha << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
case PICA::TextureFmt::IA4: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
|
||||
const u8 texel = ptr[offset];
|
||||
const u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
const u8 texel = data[offset];
|
||||
const u8 alpha = Colour::convert4To8Bit(texel & 0xf);
|
||||
const u8 intensity = Colour::convert4To8Bit(texel >> 4);
|
||||
|
||||
|
@ -199,11 +188,10 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
}
|
||||
|
||||
case PICA::TextureFmt::A4: {
|
||||
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
const u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
|
||||
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||
u8 alpha = ptr[offset] >> ((u % 2) ? 4 : 0);
|
||||
u8 alpha = data[offset] >> ((u % 2) ? 4 : 0);
|
||||
alpha = Colour::convert4To8Bit(getBits<0, 4>(alpha));
|
||||
|
||||
// A8 sets RGB to 0
|
||||
|
@ -212,8 +200,7 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
|
||||
case PICA::TextureFmt::A8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
const u8 alpha = ptr[offset];
|
||||
const u8 alpha = data[offset];
|
||||
|
||||
// A8 sets RGB to 0
|
||||
return (alpha << 24) | (0 << 16) | (0 << 8) | 0;
|
||||
|
@ -221,10 +208,9 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
|
||||
case PICA::TextureFmt::I4: {
|
||||
u32 offset = getSwizzledOffset_4bpp(u, v, size.u());
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
|
||||
// For odd U coordinates, grab the top 4 bits, and the low 4 bits for even coordinates
|
||||
u8 intensity = ptr[offset] >> ((u % 2) ? 4 : 0);
|
||||
u8 intensity = data[offset] >> ((u % 2) ? 4 : 0);
|
||||
intensity = Colour::convert4To8Bit(getBits<0, 4>(intensity));
|
||||
|
||||
// Intensity formats just copy the intensity value to every colour channel
|
||||
|
@ -233,8 +219,7 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
|
||||
case PICA::TextureFmt::I8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 1);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
const u8 intensity = ptr[offset];
|
||||
const u8 intensity = data[offset];
|
||||
|
||||
// Intensity formats just copy the intensity value to every colour channel
|
||||
return (0xff << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||
|
@ -242,11 +227,10 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
|
||||
case PICA::TextureFmt::IA8: {
|
||||
u32 offset = getSwizzledOffset(u, v, size.u(), 2);
|
||||
auto ptr = static_cast<const u8*>(data);
|
||||
|
||||
// Same as I8 except each pixel gets its own alpha value too
|
||||
const u8 alpha = ptr[offset];
|
||||
const u8 intensity = ptr[offset + 1];
|
||||
const u8 alpha = data[offset];
|
||||
const u8 intensity = data[offset + 1];
|
||||
return (alpha << 24) | (intensity << 16) | (intensity << 8) | intensity;
|
||||
}
|
||||
|
||||
|
@ -258,7 +242,7 @@ u32 Texture::decodeTexel(u32 u, u32 v, PICA::TextureFmt fmt, const void* data) {
|
|||
}
|
||||
}
|
||||
|
||||
void Texture::decodeTexture(const void* data) {
|
||||
void Texture::decodeTexture(std::span<const u8> data) {
|
||||
std::vector<u32> decoded;
|
||||
decoded.reserve(u64(size.u()) * u64(size.v()));
|
||||
|
||||
|
@ -272,4 +256,4 @@ void Texture::decodeTexture(const void* data) {
|
|||
|
||||
texture.bind();
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, size.u(), size.v(), GL_RGBA, GL_UNSIGNED_BYTE, decoded.data());
|
||||
}
|
||||
}
|
||||
|
|
12
src/core/renderer_null/renderer_null.cpp
Normal file
12
src/core/renderer_null/renderer_null.cpp
Normal file
|
@ -0,0 +1,12 @@
|
|||
#include "renderer_null/renderer_null.hpp"
|
||||
|
||||
RendererNull::RendererNull(GPU& gpu, const std::array<u32, regNum>& internalRegs) : Renderer(gpu, internalRegs) {}
|
||||
RendererNull::~RendererNull() {}
|
||||
|
||||
void RendererNull::reset() {}
|
||||
void RendererNull::display() {}
|
||||
void RendererNull::initGraphicsContext() {}
|
||||
void RendererNull::clearBuffer(u32 startAddress, u32 endAddress, u32 value, u32 control) {}
|
||||
void RendererNull::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32 outputSize, u32 flags) {}
|
||||
void RendererNull::drawVertices(PICA::PrimType primType, std::span<const PICA::Vertex> vertices) {}
|
||||
void RendererNull::screenshot(const std::string& name) {}
|
179
src/emulator.cpp
179
src/emulator.cpp
|
@ -1,6 +1,8 @@
|
|||
#include "emulator.hpp"
|
||||
|
||||
#include <stb_image_write.h>
|
||||
#ifdef PANDA3DS_ENABLE_OPENGL
|
||||
#include <glad/gl.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
|
@ -12,7 +14,9 @@ __declspec(dllexport) DWORD AmdPowerXpressRequestHighPerformance = 1;
|
|||
}
|
||||
#endif
|
||||
|
||||
Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, gl, config), memory(cpu.getTicksRef()) {
|
||||
Emulator::Emulator()
|
||||
: config(std::filesystem::current_path() / "config.toml"), kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory, config),
|
||||
memory(cpu.getTicksRef()), cheats(memory, kernel.getServiceManager().getHID()) {
|
||||
if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_EVENTS) < 0) {
|
||||
Helpers::panic("Failed to initialize SDL2");
|
||||
}
|
||||
|
@ -23,25 +27,29 @@ Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory
|
|||
Helpers::warn("Failed to initialize SDL2 GameController: %s", SDL_GetError());
|
||||
}
|
||||
|
||||
// Request OpenGL 4.1 Core (Max available on MacOS)
|
||||
// MacOS gets mad if we don't explicitly demand a core profile
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1);
|
||||
window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, width, height, SDL_WINDOW_OPENGL);
|
||||
#ifdef PANDA3DS_ENABLE_OPENGL
|
||||
if (config.rendererType == RendererType::OpenGL) {
|
||||
// Request OpenGL 4.1 Core (Max available on MacOS)
|
||||
// MacOS gets mad if we don't explicitly demand a core profile
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
|
||||
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 1);
|
||||
window = SDL_CreateWindow("Alber", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, width, height, SDL_WINDOW_OPENGL);
|
||||
|
||||
if (window == nullptr) {
|
||||
Helpers::panic("Window creation failed: %s", SDL_GetError());
|
||||
}
|
||||
if (window == nullptr) {
|
||||
Helpers::panic("Window creation failed: %s", SDL_GetError());
|
||||
}
|
||||
|
||||
glContext = SDL_GL_CreateContext(window);
|
||||
if (glContext == nullptr) {
|
||||
Helpers::panic("OpenGL context creation failed: %s", SDL_GetError());
|
||||
}
|
||||
glContext = SDL_GL_CreateContext(window);
|
||||
if (glContext == nullptr) {
|
||||
Helpers::panic("OpenGL context creation failed: %s", SDL_GetError());
|
||||
}
|
||||
|
||||
if (!gladLoadGL(reinterpret_cast<GLADloadfunc>(SDL_GL_GetProcAddress))) {
|
||||
Helpers::panic("OpenGL init failed: %s", SDL_GetError());
|
||||
if (!gladLoadGL(reinterpret_cast<GLADloadfunc>(SDL_GL_GetProcAddress))) {
|
||||
Helpers::panic("OpenGL init failed: %s", SDL_GetError());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (SDL_WasInit(SDL_INIT_GAMECONTROLLER)) {
|
||||
gameController = SDL_GameControllerOpen(0);
|
||||
|
@ -52,7 +60,6 @@ Emulator::Emulator() : kernel(cpu, memory, gpu), cpu(memory, kernel), gpu(memory
|
|||
}
|
||||
}
|
||||
|
||||
config.load(std::filesystem::current_path() / "config.toml");
|
||||
reset(ReloadOption::NoReload);
|
||||
}
|
||||
|
||||
|
@ -69,6 +76,12 @@ void Emulator::reset(ReloadOption reload) {
|
|||
// Otherwise resetting the kernel or cpu might nuke them
|
||||
cpu.setReg(13, VirtualAddrs::StackTop); // Set initial SP
|
||||
|
||||
// We're resetting without reloading the ROM, so yeet cheats
|
||||
if (reload == ReloadOption::NoReload) {
|
||||
haveCheats = false;
|
||||
cheats.reset();
|
||||
}
|
||||
|
||||
// If a ROM is active and we reset, with the reload option enabled then reload it.
|
||||
// This is necessary to set up stack, executable memory, .data/.rodata/.bss all over again
|
||||
if (reload == ReloadOption::Reload && romType != ROMType::None && romPath.has_value()) {
|
||||
|
@ -91,19 +104,8 @@ void Emulator::run() {
|
|||
#endif
|
||||
|
||||
while (running) {
|
||||
ServiceManager& srv = kernel.getServiceManager();
|
||||
|
||||
if (romType != ROMType::None) {
|
||||
#ifdef PANDA3DS_ENABLE_HTTP_SERVER
|
||||
pollHttpServer();
|
||||
#endif
|
||||
runFrame(); // Run 1 frame of instructions
|
||||
gpu.display(); // Display graphics
|
||||
|
||||
// Send VBlank interrupts
|
||||
srv.sendGPUInterrupt(GPUInterrupt::VBlank0);
|
||||
srv.sendGPUInterrupt(GPUInterrupt::VBlank1);
|
||||
}
|
||||
runFrame();
|
||||
HIDService& hid = kernel.getServiceManager().getHID();
|
||||
|
||||
SDL_Event event;
|
||||
while (SDL_PollEvent(&event)) {
|
||||
|
@ -119,41 +121,41 @@ void Emulator::run() {
|
|||
if (romType == ROMType::None) break;
|
||||
|
||||
switch (event.key.keysym.sym) {
|
||||
case SDLK_l: srv.pressKey(Keys::A); break;
|
||||
case SDLK_k: srv.pressKey(Keys::B); break;
|
||||
case SDLK_o: srv.pressKey(Keys::X); break;
|
||||
case SDLK_i: srv.pressKey(Keys::Y); break;
|
||||
case SDLK_l: hid.pressKey(Keys::A); break;
|
||||
case SDLK_k: hid.pressKey(Keys::B); break;
|
||||
case SDLK_o: hid.pressKey(Keys::X); break;
|
||||
case SDLK_i: hid.pressKey(Keys::Y); break;
|
||||
|
||||
case SDLK_q: srv.pressKey(Keys::L); break;
|
||||
case SDLK_p: srv.pressKey(Keys::R); break;
|
||||
case SDLK_q: hid.pressKey(Keys::L); break;
|
||||
case SDLK_p: hid.pressKey(Keys::R); break;
|
||||
|
||||
case SDLK_RIGHT: srv.pressKey(Keys::Right); break;
|
||||
case SDLK_LEFT: srv.pressKey(Keys::Left); break;
|
||||
case SDLK_UP: srv.pressKey(Keys::Up); break;
|
||||
case SDLK_DOWN: srv.pressKey(Keys::Down); break;
|
||||
case SDLK_RIGHT: hid.pressKey(Keys::Right); break;
|
||||
case SDLK_LEFT: hid.pressKey(Keys::Left); break;
|
||||
case SDLK_UP: hid.pressKey(Keys::Up); break;
|
||||
case SDLK_DOWN: hid.pressKey(Keys::Down); break;
|
||||
|
||||
case SDLK_w:
|
||||
srv.setCirclepadY(0x9C);
|
||||
hid.setCirclepadY(0x9C);
|
||||
keyboardAnalogY = true;
|
||||
break;
|
||||
|
||||
case SDLK_a:
|
||||
srv.setCirclepadX(-0x9C);
|
||||
hid.setCirclepadX(-0x9C);
|
||||
keyboardAnalogX = true;
|
||||
break;
|
||||
|
||||
case SDLK_s:
|
||||
srv.setCirclepadY(-0x9C);
|
||||
hid.setCirclepadY(-0x9C);
|
||||
keyboardAnalogY = true;
|
||||
break;
|
||||
|
||||
case SDLK_d:
|
||||
srv.setCirclepadX(0x9C);
|
||||
hid.setCirclepadX(0x9C);
|
||||
keyboardAnalogX = true;
|
||||
break;
|
||||
|
||||
case SDLK_RETURN: srv.pressKey(Keys::Start); break;
|
||||
case SDLK_BACKSPACE: srv.pressKey(Keys::Select); break;
|
||||
case SDLK_RETURN: hid.pressKey(Keys::Start); break;
|
||||
case SDLK_BACKSPACE: hid.pressKey(Keys::Select); break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -161,34 +163,34 @@ void Emulator::run() {
|
|||
if (romType == ROMType::None) break;
|
||||
|
||||
switch (event.key.keysym.sym) {
|
||||
case SDLK_l: srv.releaseKey(Keys::A); break;
|
||||
case SDLK_k: srv.releaseKey(Keys::B); break;
|
||||
case SDLK_o: srv.releaseKey(Keys::X); break;
|
||||
case SDLK_i: srv.releaseKey(Keys::Y); break;
|
||||
case SDLK_l: hid.releaseKey(Keys::A); break;
|
||||
case SDLK_k: hid.releaseKey(Keys::B); break;
|
||||
case SDLK_o: hid.releaseKey(Keys::X); break;
|
||||
case SDLK_i: hid.releaseKey(Keys::Y); break;
|
||||
|
||||
case SDLK_q: srv.releaseKey(Keys::L); break;
|
||||
case SDLK_p: srv.releaseKey(Keys::R); break;
|
||||
case SDLK_q: hid.releaseKey(Keys::L); break;
|
||||
case SDLK_p: hid.releaseKey(Keys::R); break;
|
||||
|
||||
case SDLK_RIGHT: srv.releaseKey(Keys::Right); break;
|
||||
case SDLK_LEFT: srv.releaseKey(Keys::Left); break;
|
||||
case SDLK_UP: srv.releaseKey(Keys::Up); break;
|
||||
case SDLK_DOWN: srv.releaseKey(Keys::Down); break;
|
||||
case SDLK_RIGHT: hid.releaseKey(Keys::Right); break;
|
||||
case SDLK_LEFT: hid.releaseKey(Keys::Left); break;
|
||||
case SDLK_UP: hid.releaseKey(Keys::Up); break;
|
||||
case SDLK_DOWN: hid.releaseKey(Keys::Down); break;
|
||||
|
||||
// Err this is probably not ideal
|
||||
case SDLK_w:
|
||||
case SDLK_s:
|
||||
srv.setCirclepadY(0);
|
||||
hid.setCirclepadY(0);
|
||||
keyboardAnalogY = false;
|
||||
break;
|
||||
|
||||
case SDLK_a:
|
||||
case SDLK_d:
|
||||
srv.setCirclepadX(0);
|
||||
hid.setCirclepadX(0);
|
||||
keyboardAnalogX = false;
|
||||
break;
|
||||
|
||||
case SDLK_RETURN: srv.releaseKey(Keys::Start); break;
|
||||
case SDLK_BACKSPACE: srv.releaseKey(Keys::Select); break;
|
||||
case SDLK_RETURN: hid.releaseKey(Keys::Start); break;
|
||||
case SDLK_BACKSPACE: hid.releaseKey(Keys::Select); break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -205,9 +207,9 @@ void Emulator::run() {
|
|||
u16 x_converted = static_cast<u16>(x) - 40;
|
||||
u16 y_converted = static_cast<u16>(y) - 240;
|
||||
|
||||
srv.setTouchScreenPress(x_converted, y_converted);
|
||||
hid.setTouchScreenPress(x_converted, y_converted);
|
||||
} else {
|
||||
srv.releaseTouchScreen();
|
||||
hid.releaseTouchScreen();
|
||||
}
|
||||
} else if (event.button.button == SDL_BUTTON_RIGHT) {
|
||||
holdingRightClick = true;
|
||||
|
@ -219,7 +221,7 @@ void Emulator::run() {
|
|||
if (romType == ROMType::None) break;
|
||||
|
||||
if (event.button.button == SDL_BUTTON_LEFT) {
|
||||
srv.releaseTouchScreen();
|
||||
hid.releaseTouchScreen();
|
||||
} else if (event.button.button == SDL_BUTTON_RIGHT) {
|
||||
holdingRightClick = false;
|
||||
}
|
||||
|
@ -262,9 +264,9 @@ void Emulator::run() {
|
|||
|
||||
if (key != 0) {
|
||||
if (event.cbutton.state == SDL_PRESSED) {
|
||||
srv.pressKey(key);
|
||||
hid.pressKey(key);
|
||||
} else {
|
||||
srv.releaseKey(key);
|
||||
hid.releaseKey(key);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -283,8 +285,8 @@ void Emulator::run() {
|
|||
// So up until then, we will set the gyroscope euler angles to fixed values based on the direction of the relative motion
|
||||
const s32 roll = motionX > 0 ? 0x7f : -0x7f;
|
||||
const s32 pitch = motionY > 0 ? 0x7f : -0x7f;
|
||||
srv.setRoll(roll);
|
||||
srv.setPitch(pitch);
|
||||
hid.setRoll(roll);
|
||||
hid.setPitch(pitch);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -311,19 +313,19 @@ void Emulator::run() {
|
|||
|
||||
// Avoid overriding the keyboard's circlepad input
|
||||
if (abs(stickX) < deadzone && !keyboardAnalogX) {
|
||||
srv.setCirclepadX(0);
|
||||
hid.setCirclepadX(0);
|
||||
} else {
|
||||
srv.setCirclepadX(stickX / div);
|
||||
hid.setCirclepadX(stickX / div);
|
||||
}
|
||||
|
||||
if (abs(stickY) < deadzone && !keyboardAnalogY) {
|
||||
srv.setCirclepadY(0);
|
||||
hid.setCirclepadY(0);
|
||||
} else {
|
||||
srv.setCirclepadY(-(stickY / div));
|
||||
hid.setCirclepadY(-(stickY / div));
|
||||
}
|
||||
}
|
||||
|
||||
srv.updateInputs(cpu.getTicks());
|
||||
hid.updateInputs(cpu.getTicks());
|
||||
}
|
||||
|
||||
// Update inputs in the HID module
|
||||
|
@ -331,7 +333,24 @@ void Emulator::run() {
|
|||
}
|
||||
}
|
||||
|
||||
void Emulator::runFrame() { cpu.runFrame(); }
|
||||
void Emulator::runFrame() {
|
||||
if (romType != ROMType::None) {
|
||||
#ifdef PANDA3DS_ENABLE_HTTP_SERVER
|
||||
pollHttpServer();
|
||||
#endif
|
||||
cpu.runFrame(); // Run 1 frame of instructions
|
||||
gpu.display(); // Display graphics
|
||||
|
||||
// Send VBlank interrupts
|
||||
ServiceManager& srv = kernel.getServiceManager();
|
||||
srv.sendGPUInterrupt(GPUInterrupt::VBlank0);
|
||||
srv.sendGPUInterrupt(GPUInterrupt::VBlank1);
|
||||
|
||||
if (haveCheats) [[unlikely]] {
|
||||
cheats.run();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Emulator::loadROM(const std::filesystem::path& path) {
|
||||
// Reset the emulator if we've already loaded a ROM
|
||||
|
@ -427,15 +446,13 @@ bool Emulator::loadELF(std::ifstream& file) {
|
|||
}
|
||||
|
||||
// Reset our graphics context and initialize the GPU's graphics context
|
||||
void Emulator::initGraphicsContext() {
|
||||
gl.reset(); // TODO (For when we have multiple backends): Only do this if we are using OpenGL
|
||||
gpu.initGraphicsContext();
|
||||
}
|
||||
void Emulator::initGraphicsContext() { gpu.initGraphicsContext(); }
|
||||
|
||||
#ifdef PANDA3DS_ENABLE_HTTP_SERVER
|
||||
void Emulator::pollHttpServer() {
|
||||
std::scoped_lock lock(httpServer.actionMutex);
|
||||
ServiceManager& srv = kernel.getServiceManager();
|
||||
|
||||
HIDService& hid = kernel.getServiceManager().getHID();
|
||||
|
||||
if (httpServer.pendingAction) {
|
||||
switch (httpServer.action) {
|
||||
|
@ -443,14 +460,14 @@ void Emulator::pollHttpServer() {
|
|||
|
||||
case HttpAction::PressKey:
|
||||
if (httpServer.pendingKey != 0) {
|
||||
srv.pressKey(httpServer.pendingKey);
|
||||
hid.pressKey(httpServer.pendingKey);
|
||||
httpServer.pendingKey = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case HttpAction::ReleaseKey:
|
||||
if (httpServer.pendingKey != 0) {
|
||||
srv.releaseKey(httpServer.pendingKey);
|
||||
hid.releaseKey(httpServer.pendingKey);
|
||||
httpServer.pendingKey = 0;
|
||||
}
|
||||
break;
|
||||
|
|
8
src/host_shaders/opengl_display.frag
Normal file
8
src/host_shaders/opengl_display.frag
Normal file
|
@ -0,0 +1,8 @@
|
|||
#version 410 core
|
||||
in vec2 UV;
|
||||
out vec4 FragColor;
|
||||
|
||||
uniform sampler2D u_texture;
|
||||
void main() {
|
||||
FragColor = texture(u_texture, UV);
|
||||
}
|
23
src/host_shaders/opengl_display.vert
Normal file
23
src/host_shaders/opengl_display.vert
Normal file
|
@ -0,0 +1,23 @@
|
|||
#version 410 core
|
||||
out vec2 UV;
|
||||
|
||||
void main() {
|
||||
const vec4 positions[4] = vec4[](
|
||||
vec4(-1.0, 1.0, 1.0, 1.0), // Top-left
|
||||
vec4(1.0, 1.0, 1.0, 1.0), // Top-right
|
||||
vec4(-1.0, -1.0, 1.0, 1.0), // Bottom-left
|
||||
vec4(1.0, -1.0, 1.0, 1.0) // Bottom-right
|
||||
);
|
||||
|
||||
// The 3DS displays both screens' framebuffer rotated 90 deg counter clockwise
|
||||
// So we adjust our texcoords accordingly
|
||||
const vec2 texcoords[4] = vec2[](
|
||||
vec2(1.0, 1.0), // Top-right
|
||||
vec2(1.0, 0.0), // Bottom-right
|
||||
vec2(0.0, 1.0), // Top-left
|
||||
vec2(0.0, 0.0) // Bottom-left
|
||||
);
|
||||
|
||||
gl_Position = positions[gl_VertexID];
|
||||
UV = texcoords[gl_VertexID];
|
||||
}
|
409
src/host_shaders/opengl_fragment_shader.frag
Normal file
409
src/host_shaders/opengl_fragment_shader.frag
Normal file
|
@ -0,0 +1,409 @@
|
|||
#version 410 core
|
||||
|
||||
in vec3 v_tangent;
|
||||
in vec3 v_normal;
|
||||
in vec3 v_bitangent;
|
||||
in vec4 v_colour;
|
||||
in vec3 v_texcoord0;
|
||||
in vec2 v_texcoord1;
|
||||
in vec3 v_view;
|
||||
in vec2 v_texcoord2;
|
||||
flat in vec4 v_textureEnvColor[6];
|
||||
flat in vec4 v_textureEnvBufferColor;
|
||||
|
||||
out vec4 fragColour;
|
||||
|
||||
// TEV uniforms
|
||||
uniform uint u_textureEnvSource[6];
|
||||
uniform uint u_textureEnvOperand[6];
|
||||
uniform uint u_textureEnvCombiner[6];
|
||||
uniform uint u_textureEnvScale[6];
|
||||
|
||||
// Depth control uniforms
|
||||
uniform float u_depthScale;
|
||||
uniform float u_depthOffset;
|
||||
uniform bool u_depthmapEnable;
|
||||
|
||||
uniform sampler2D u_tex0;
|
||||
uniform sampler2D u_tex1;
|
||||
uniform sampler2D u_tex2;
|
||||
uniform sampler1DArray u_tex_lighting_lut;
|
||||
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
|
||||
// Helper so that the implementation of u_pica_regs can be changed later
|
||||
uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48]; }
|
||||
|
||||
vec4 tevSources[16];
|
||||
vec4 tevNextPreviousBuffer;
|
||||
bool tevUnimplementedSourceFlag = false;
|
||||
|
||||
// OpenGL ES 1.1 reference pages for TEVs (this is what the PICA200 implements):
|
||||
// https://registry.khronos.org/OpenGL-Refpages/es1.1/xhtml/glTexEnv.xml
|
||||
|
||||
vec4 tevFetchSource(uint src_id) {
|
||||
if (src_id >= 6u && src_id < 13u) {
|
||||
tevUnimplementedSourceFlag = true;
|
||||
}
|
||||
|
||||
return tevSources[src_id];
|
||||
}
|
||||
|
||||
vec4 tevGetColorAndAlphaSource(int tev_id, int src_id) {
|
||||
vec4 result;
|
||||
|
||||
vec4 colorSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4)) & 15u);
|
||||
vec4 alphaSource = tevFetchSource((u_textureEnvSource[tev_id] >> (src_id * 4 + 16)) & 15u);
|
||||
|
||||
uint colorOperand = (u_textureEnvOperand[tev_id] >> (src_id * 4)) & 15u;
|
||||
uint alphaOperand = (u_textureEnvOperand[tev_id] >> (12 + src_id * 4)) & 7u;
|
||||
|
||||
// TODO: figure out what the undocumented values do
|
||||
switch (colorOperand) {
|
||||
case 0u: result.rgb = colorSource.rgb; break; // Source color
|
||||
case 1u: result.rgb = 1.0 - colorSource.rgb; break; // One minus source color
|
||||
case 2u: result.rgb = vec3(colorSource.a); break; // Source alpha
|
||||
case 3u: result.rgb = vec3(1.0 - colorSource.a); break; // One minus source alpha
|
||||
case 4u: result.rgb = vec3(colorSource.r); break; // Source red
|
||||
case 5u: result.rgb = vec3(1.0 - colorSource.r); break; // One minus source red
|
||||
case 8u: result.rgb = vec3(colorSource.g); break; // Source green
|
||||
case 9u: result.rgb = vec3(1.0 - colorSource.g); break; // One minus source green
|
||||
case 12u: result.rgb = vec3(colorSource.b); break; // Source blue
|
||||
case 13u: result.rgb = vec3(1.0 - colorSource.b); break; // One minus source blue
|
||||
default: break;
|
||||
}
|
||||
|
||||
// TODO: figure out what the undocumented values do
|
||||
switch (alphaOperand) {
|
||||
case 0u: result.a = alphaSource.a; break; // Source alpha
|
||||
case 1u: result.a = 1.0 - alphaSource.a; break; // One minus source alpha
|
||||
case 2u: result.a = alphaSource.r; break; // Source red
|
||||
case 3u: result.a = 1.0 - alphaSource.r; break; // One minus source red
|
||||
case 4u: result.a = alphaSource.g; break; // Source green
|
||||
case 5u: result.a = 1.0 - alphaSource.g; break; // One minus source green
|
||||
case 6u: result.a = alphaSource.b; break; // Source blue
|
||||
case 7u: result.a = 1.0 - alphaSource.b; break; // One minus source blue
|
||||
default: break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 tevCalculateCombiner(int tev_id) {
|
||||
vec4 source0 = tevGetColorAndAlphaSource(tev_id, 0);
|
||||
vec4 source1 = tevGetColorAndAlphaSource(tev_id, 1);
|
||||
vec4 source2 = tevGetColorAndAlphaSource(tev_id, 2);
|
||||
|
||||
uint colorCombine = u_textureEnvCombiner[tev_id] & 15u;
|
||||
uint alphaCombine = (u_textureEnvCombiner[tev_id] >> 16) & 15u;
|
||||
|
||||
vec4 result = vec4(1.0);
|
||||
|
||||
// TODO: figure out what the undocumented values do
|
||||
switch (colorCombine) {
|
||||
case 0u: result.rgb = source0.rgb; break; // Replace
|
||||
case 1u: result.rgb = source0.rgb * source1.rgb; break; // Modulate
|
||||
case 2u: result.rgb = min(vec3(1.0), source0.rgb + source1.rgb); break; // Add
|
||||
case 3u: result.rgb = clamp(source0.rgb + source1.rgb - 0.5, 0.0, 1.0); break; // Add signed
|
||||
case 4u: result.rgb = mix(source1.rgb, source0.rgb, source2.rgb); break; // Interpolate
|
||||
case 5u: result.rgb = max(source0.rgb - source1.rgb, 0.0); break; // Subtract
|
||||
case 6u: result.rgb = vec3(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGB
|
||||
case 7u: result = vec4(4.0 * dot(source0.rgb - 0.5, source1.rgb - 0.5)); break; // Dot3 RGBA
|
||||
case 8u: result.rgb = min(source0.rgb * source1.rgb + source2.rgb, 1.0); break; // Multiply then add
|
||||
case 9u: result.rgb = min((source0.rgb + source1.rgb) * source2.rgb, 1.0); break; // Add then multiply
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (colorCombine != 7u) { // The color combiner also writes the alpha channel in the "Dot3 RGBA" mode.
|
||||
// TODO: figure out what the undocumented values do
|
||||
// TODO: test if the alpha combiner supports all the same modes as the color combiner.
|
||||
switch (alphaCombine) {
|
||||
case 0u: result.a = source0.a; break; // Replace
|
||||
case 1u: result.a = source0.a * source1.a; break; // Modulate
|
||||
case 2u: result.a = min(1.0, source0.a + source1.a); break; // Add
|
||||
case 3u: result.a = clamp(source0.a + source1.a - 0.5, 0.0, 1.0); break; // Add signed
|
||||
case 4u: result.a = mix(source1.a, source0.a, source2.a); break; // Interpolate
|
||||
case 5u: result.a = max(0.0, source0.a - source1.a); break; // Subtract
|
||||
case 8u: result.a = min(1.0, source0.a * source1.a + source2.a); break; // Multiply then add
|
||||
case 9u: result.a = min(1.0, (source0.a + source1.a) * source2.a); break; // Add then multiply
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
result.rgb *= float(1 << (u_textureEnvScale[tev_id] & 3u));
|
||||
result.a *= float(1 << ((u_textureEnvScale[tev_id] >> 16) & 3u));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#define D0_LUT 0u
|
||||
#define D1_LUT 1u
|
||||
#define SP_LUT 2u
|
||||
#define FR_LUT 3u
|
||||
#define RB_LUT 4u
|
||||
#define RG_LUT 5u
|
||||
#define RR_LUT 6u
|
||||
|
||||
float lutLookup(uint lut, uint light, float value) {
|
||||
if (lut >= FR_LUT && lut <= RR_LUT) lut -= 1;
|
||||
if (lut == SP_LUT) lut = light + 8;
|
||||
return texture(u_tex_lighting_lut, vec2(value, lut)).r;
|
||||
}
|
||||
|
||||
vec3 regToColor(uint reg) {
|
||||
// Normalization scale to convert from [0...255] to [0.0...1.0]
|
||||
const float scale = 1.0 / 255.0;
|
||||
|
||||
return scale * vec3(float(bitfieldExtract(reg, 20, 8)), float(bitfieldExtract(reg, 10, 8)), float(bitfieldExtract(reg, 00, 8)));
|
||||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
float decodeFP(uint hex, uint E, uint M) {
|
||||
uint width = M + E + 1u;
|
||||
uint bias = 128u - (1u << (E - 1u));
|
||||
uint exponent = (hex >> M) & ((1u << E) - 1u);
|
||||
uint mantissa = hex & ((1u << M) - 1u);
|
||||
uint sign = (hex >> (E + M)) << 31u;
|
||||
|
||||
if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
|
||||
if (exponent == (1u << E) - 1u)
|
||||
exponent = 255u;
|
||||
else
|
||||
exponent += bias;
|
||||
hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
|
||||
} else {
|
||||
hex = sign;
|
||||
}
|
||||
|
||||
return uintBitsToFloat(hex);
|
||||
}
|
||||
|
||||
// Implements the following algorthm: https://mathb.in/26766
|
||||
void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
||||
// Quaternions describe a transformation from surface-local space to eye space.
|
||||
// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
|
||||
// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
|
||||
vec3 normal = normalize(v_normal);
|
||||
vec3 tangent = normalize(v_tangent);
|
||||
vec3 bitangent = normalize(v_bitangent);
|
||||
vec3 view = normalize(v_view);
|
||||
|
||||
uint GPUREG_LIGHTING_ENABLE = readPicaReg(0x008F);
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0) {
|
||||
primary_color = secondary_color = vec4(1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0);
|
||||
uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) + 1;
|
||||
uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9);
|
||||
|
||||
primary_color = vec4(vec3(0.0), 1.0);
|
||||
secondary_color = vec4(vec3(0.0), 1.0);
|
||||
|
||||
primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
|
||||
|
||||
uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1);
|
||||
uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3);
|
||||
uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4);
|
||||
uint GPUREG_LIGHTING_LUTINPUT_SCALE = readPicaReg(0x01D2);
|
||||
float d[7];
|
||||
|
||||
bool error_unimpl = false;
|
||||
|
||||
for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++) {
|
||||
uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION, int(i * 3), 3);
|
||||
|
||||
uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_VECTOR_HIGH = readPicaReg(0x0145 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id);
|
||||
|
||||
vec3 light_vector = normalize(vec3(
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10), decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
|
||||
));
|
||||
|
||||
// Positional Light
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) error_unimpl = true;
|
||||
|
||||
vec3 half_vector = normalize(normalize(light_vector) + view);
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0) {
|
||||
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
|
||||
float scale = float(1u << scale_id);
|
||||
if (scale_id >= 6u) scale /= 256.0;
|
||||
|
||||
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
|
||||
if (input_id == 0u)
|
||||
d[c] = dot(normal, half_vector);
|
||||
else if (input_id == 1u)
|
||||
d[c] = dot(view, half_vector);
|
||||
else if (input_id == 2u)
|
||||
d[c] = dot(normal, view);
|
||||
else if (input_id == 3u)
|
||||
d[c] = dot(light_vector, normal);
|
||||
else if (input_id == 4u) {
|
||||
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id);
|
||||
uint GPUREG_LIGHTi_SPOTDIR_HIGH = readPicaReg(0x0147 + 0x10 * light_id);
|
||||
vec3 spot_light_vector = normalize(vec3(
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11),
|
||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11)
|
||||
));
|
||||
d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
|
||||
} else if (input_id == 5u) {
|
||||
d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
|
||||
error_unimpl = true;
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
|
||||
d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale;
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) d[c] = abs(d[c]);
|
||||
} else {
|
||||
d[c] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG, 4, 4);
|
||||
if (lookup_config == 0) {
|
||||
d[D1_LUT] = 0.0;
|
||||
d[FR_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 1) {
|
||||
d[D0_LUT] = 0.0;
|
||||
d[D1_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 2) {
|
||||
d[FR_LUT] = 0.0;
|
||||
d[SP_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
} else if (lookup_config == 3) {
|
||||
d[SP_LUT] = 0.0;
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT] = 1.0;
|
||||
} else if (lookup_config == 4) {
|
||||
d[FR_LUT] = 0.0;
|
||||
} else if (lookup_config == 5) {
|
||||
d[D1_LUT] = 0.0;
|
||||
} else if (lookup_config == 6) {
|
||||
d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
|
||||
}
|
||||
|
||||
float distance_factor = 1.0; // a
|
||||
float indirect_factor = 1.0; // fi
|
||||
float shadow_factor = 1.0; // o
|
||||
|
||||
float NdotL = dot(normal, light_vector); // Li dot N
|
||||
|
||||
// Two sided diffuse
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0)
|
||||
NdotL = max(0.0, NdotL);
|
||||
else
|
||||
NdotL = abs(NdotL);
|
||||
|
||||
float light_factor = distance_factor * d[SP_LUT] * indirect_factor * shadow_factor;
|
||||
|
||||
primary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE) * NdotL);
|
||||
secondary_color.rgb += light_factor * (regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
|
||||
regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT]));
|
||||
}
|
||||
uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
|
||||
uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
|
||||
|
||||
if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
|
||||
if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
|
||||
|
||||
if (error_unimpl) {
|
||||
secondary_color = primary_color = vec4(1.0, 0., 1.0, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
// TODO: what do invalid sources and disabled textures read as?
|
||||
// And what does the "previous combiner" source read initially?
|
||||
tevSources[0] = v_colour; // Primary/vertex color
|
||||
calcLighting(tevSources[1], tevSources[2]);
|
||||
|
||||
uint textureConfig = readPicaReg(0x80);
|
||||
vec2 tex2UV = (textureConfig & (1u << 13)) != 0u ? v_texcoord1 : v_texcoord2;
|
||||
|
||||
if ((textureConfig & 1u) != 0u) tevSources[3] = texture(u_tex0, v_texcoord0.xy);
|
||||
if ((textureConfig & 2u) != 0u) tevSources[4] = texture(u_tex1, v_texcoord1);
|
||||
if ((textureConfig & 4u) != 0u) tevSources[5] = texture(u_tex2, tex2UV);
|
||||
tevSources[13] = vec4(0.0); // Previous buffer
|
||||
tevSources[15] = vec4(0.0); // Previous combiner
|
||||
|
||||
tevNextPreviousBuffer = v_textureEnvBufferColor;
|
||||
uint textureEnvUpdateBuffer = readPicaReg(0xE0);
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
tevSources[14] = v_textureEnvColor[i]; // Constant color
|
||||
tevSources[15] = tevCalculateCombiner(i);
|
||||
tevSources[13] = tevNextPreviousBuffer;
|
||||
|
||||
if (i < 4) {
|
||||
if ((textureEnvUpdateBuffer & (0x100u << i)) != 0u) {
|
||||
tevNextPreviousBuffer.rgb = tevSources[15].rgb;
|
||||
}
|
||||
|
||||
if ((textureEnvUpdateBuffer & (0x1000u << i)) != 0u) {
|
||||
tevNextPreviousBuffer.a = tevSources[15].a;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragColour = tevSources[15];
|
||||
|
||||
if (tevUnimplementedSourceFlag) {
|
||||
// fragColour = vec4(1.0, 0.0, 1.0, 1.0);
|
||||
}
|
||||
// fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
|
||||
|
||||
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
|
||||
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
|
||||
float z_over_w = gl_FragCoord.z * 2.0f - 1.0f;
|
||||
float depth = z_over_w * u_depthScale + u_depthOffset;
|
||||
|
||||
if (!u_depthmapEnable) // Divide z by w if depthmap enable == 0 (ie using W-buffering)
|
||||
depth /= gl_FragCoord.w;
|
||||
|
||||
// Write final fragment depth
|
||||
gl_FragDepth = depth;
|
||||
|
||||
// Perform alpha test
|
||||
uint alphaControl = readPicaReg(0x104);
|
||||
if ((alphaControl & 1u) != 0u) { // Check if alpha test is on
|
||||
uint func = (alphaControl >> 4u) & 7u;
|
||||
float reference = float((alphaControl >> 8u) & 0xffu) / 255.0;
|
||||
float alpha = fragColour.a;
|
||||
|
||||
switch (func) {
|
||||
case 0: discard; // Never pass alpha test
|
||||
case 1: break; // Always pass alpha test
|
||||
case 2: // Pass if equal
|
||||
if (alpha != reference) discard;
|
||||
break;
|
||||
case 3: // Pass if not equal
|
||||
if (alpha == reference) discard;
|
||||
break;
|
||||
case 4: // Pass if less than
|
||||
if (alpha >= reference) discard;
|
||||
break;
|
||||
case 5: // Pass if less than or equal
|
||||
if (alpha > reference) discard;
|
||||
break;
|
||||
case 6: // Pass if greater than
|
||||
if (alpha <= reference) discard;
|
||||
break;
|
||||
case 7: // Pass if greater than or equal
|
||||
if (alpha < reference) discard;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
97
src/host_shaders/opengl_vertex_shader.vert
Normal file
97
src/host_shaders/opengl_vertex_shader.vert
Normal file
|
@ -0,0 +1,97 @@
|
|||
#version 410 core
|
||||
|
||||
layout(location = 0) in vec4 a_coords;
|
||||
layout(location = 1) in vec4 a_quaternion;
|
||||
layout(location = 2) in vec4 a_vertexColour;
|
||||
layout(location = 3) in vec2 a_texcoord0;
|
||||
layout(location = 4) in vec2 a_texcoord1;
|
||||
layout(location = 5) in float a_texcoord0_w;
|
||||
layout(location = 6) in vec3 a_view;
|
||||
layout(location = 7) in vec2 a_texcoord2;
|
||||
|
||||
out vec3 v_normal;
|
||||
out vec3 v_tangent;
|
||||
out vec3 v_bitangent;
|
||||
out vec4 v_colour;
|
||||
out vec3 v_texcoord0;
|
||||
out vec2 v_texcoord1;
|
||||
out vec3 v_view;
|
||||
out vec2 v_texcoord2;
|
||||
flat out vec4 v_textureEnvColor[6];
|
||||
flat out vec4 v_textureEnvBufferColor;
|
||||
|
||||
out float gl_ClipDistance[2];
|
||||
|
||||
// TEV uniforms
|
||||
uniform uint u_textureEnvColor[6];
|
||||
uniform uint u_picaRegs[0x200 - 0x48];
|
||||
|
||||
// Helper so that the implementation of u_pica_regs can be changed later
|
||||
uint readPicaReg(uint reg_addr) { return u_picaRegs[reg_addr - 0x48]; }
|
||||
|
||||
vec4 abgr8888ToVec4(uint abgr) {
|
||||
const float scale = 1.0 / 255.0;
|
||||
|
||||
return scale * vec4(float(abgr & 0xffu), float((abgr >> 8) & 0xffu), float((abgr >> 16) & 0xffu), float(abgr >> 24));
|
||||
}
|
||||
|
||||
vec3 rotateVec3ByQuaternion(vec3 v, vec4 q) {
|
||||
vec3 u = q.xyz;
|
||||
float s = q.w;
|
||||
return 2.0 * dot(u, v) * u + (s * s - dot(u, u)) * v + 2.0 * s * cross(u, v);
|
||||
}
|
||||
|
||||
// Convert an arbitrary-width floating point literal to an f32
|
||||
float decodeFP(uint hex, uint E, uint M) {
|
||||
uint width = M + E + 1u;
|
||||
uint bias = 128u - (1u << (E - 1u));
|
||||
uint exponent = (hex >> M) & ((1u << E) - 1u);
|
||||
uint mantissa = hex & ((1u << M) - 1u);
|
||||
uint sign = (hex >> (E + M)) << 31u;
|
||||
|
||||
if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
|
||||
if (exponent == (1u << E) - 1u)
|
||||
exponent = 255u;
|
||||
else
|
||||
exponent += bias;
|
||||
hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
|
||||
} else {
|
||||
hex = sign;
|
||||
}
|
||||
|
||||
return uintBitsToFloat(hex);
|
||||
}
|
||||
|
||||
void main() {
|
||||
gl_Position = a_coords;
|
||||
v_colour = a_vertexColour;
|
||||
|
||||
// Flip y axis of UVs because OpenGL uses an inverted y for texture sampling compared to the PICA
|
||||
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
|
||||
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
|
||||
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
|
||||
v_view = a_view;
|
||||
|
||||
v_normal = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
|
||||
v_tangent = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
|
||||
v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
|
||||
}
|
||||
|
||||
v_textureEnvBufferColor = abgr8888ToVec4(readPicaReg(0xFD));
|
||||
|
||||
// Parse clipping plane registers
|
||||
// The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0
|
||||
// With n = (A, B, C) being the normal vector and D being the origin point distance
|
||||
// Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1]
|
||||
vec4 clipData = vec4(
|
||||
decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16), decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16),
|
||||
decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16), decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16)
|
||||
);
|
||||
|
||||
// There's also another, always-on clipping plane based on vertex z
|
||||
gl_ClipDistance[0] = -a_coords.z;
|
||||
gl_ClipDistance[1] = dot(clipData, a_coords);
|
||||
}
|
|
@ -1,9 +1,9 @@
|
|||
#include "emulator.hpp"
|
||||
|
||||
int main (int argc, char *argv[]) {
|
||||
Emulator emu;
|
||||
int main(int argc, char *argv[]) {
|
||||
Emulator emu;
|
||||
|
||||
emu.initGraphicsContext();
|
||||
emu.initGraphicsContext();
|
||||
|
||||
if (argc > 1) {
|
||||
auto romPath = std::filesystem::current_path() / argv[1];
|
||||
|
|
35
src/renderer.cpp
Normal file
35
src/renderer.cpp
Normal file
|
@ -0,0 +1,35 @@
|
|||
#include "renderer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
|
||||
Renderer::Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
|
||||
Renderer::~Renderer() {}
|
||||
|
||||
std::optional<RendererType> Renderer::typeFromString(std::string inString) {
|
||||
// Transform to lower-case to make the setting case-insensitive
|
||||
std::transform(inString.begin(), inString.end(), inString.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||
|
||||
// Huge table of possible names and misspellings
|
||||
// Please stop misspelling Vulkan as Vulcan
|
||||
static const std::unordered_map<std::string, RendererType> map = {
|
||||
{"null", RendererType::Null}, {"nil", RendererType::Null}, {"none", RendererType::Null},
|
||||
{"gl", RendererType::OpenGL}, {"ogl", RendererType::OpenGL}, {"opengl", RendererType::OpenGL},
|
||||
{"vk", RendererType::Vulkan}, {"vulkan", RendererType::Vulkan}, {"vulcan", RendererType::Vulkan},
|
||||
};
|
||||
|
||||
if (auto search = map.find(inString); search != map.end()) {
|
||||
return search->second;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const char* Renderer::typeToString(RendererType rendererType) {
|
||||
switch (rendererType) {
|
||||
case RendererType::Null: return "null";
|
||||
case RendererType::OpenGL: return "opengl";
|
||||
case RendererType::Vulkan: return "vulkan";
|
||||
default: return "Invalid";
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue