Merge branch 'master' into metal2

This commit is contained in:
SamoZ256 2024-10-31 13:45:58 +01:00 committed by GitHub
commit 02f8250aff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 2906 additions and 319 deletions

View file

@ -67,6 +67,7 @@ void EmulatorConfig::load() {
vsyncEnabled = toml::find_or<toml::boolean>(gpu, "EnableVSync", true);
useUbershaders = toml::find_or<toml::boolean>(gpu, "UseUbershaders", ubershaderDefault);
accurateShaderMul = toml::find_or<toml::boolean>(gpu, "AccurateShaderMultiplication", false);
accelerateShaders = toml::find_or<toml::boolean>(gpu, "AccelerateShaders", accelerateShadersDefault);
forceShadergenForLights = toml::find_or<toml::boolean>(gpu, "ForceShadergenForLighting", true);
lightShadergenThreshold = toml::find_or<toml::integer>(gpu, "ShadergenLightThreshold", 1);
@ -79,7 +80,7 @@ void EmulatorConfig::load() {
if (audioResult.is_ok()) {
auto audio = audioResult.unwrap();
auto dspCoreName = toml::find_or<std::string>(audio, "DSPEmulation", "Null");
auto dspCoreName = toml::find_or<std::string>(audio, "DSPEmulation", "HLE");
dspType = Audio::DSPCore::typeFromString(dspCoreName);
audioEnabled = toml::find_or<toml::boolean>(audio, "EnableAudio", false);
}
@ -141,6 +142,7 @@ void EmulatorConfig::save() {
data["GPU"]["UseUbershaders"] = useUbershaders;
data["GPU"]["ForceShadergenForLighting"] = forceShadergenForLights;
data["GPU"]["ShadergenLightThreshold"] = lightShadergenThreshold;
data["GPU"]["AccelerateShaders"] = accelerateShaders;
data["GPU"]["EnableRenderdoc"] = enableRenderdoc;
data["Audio"]["DSPEmulation"] = std::string(Audio::DSPCore::typeToString(dspType));

View file

@ -0,0 +1,137 @@
#include "PICA/draw_acceleration.hpp"
#include <bit>
#include <tuple>
#include "PICA/gpu.hpp"
#include "PICA/pica_simd.hpp"
#include "PICA/regs.hpp"
void GPU::getAcceleratedDrawInfo(PICA::DrawAcceleration& accel, bool indexed) {
accel.indexed = indexed;
accel.totalAttribCount = totalAttribCount;
accel.enabledAttributeMask = 0;
const u32 vertexBase = ((regs[PICA::InternalRegs::VertexAttribLoc] >> 1) & 0xfffffff) * 16;
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg]; // Total # of vertices to transfer
if (indexed) {
u32 indexBufferConfig = regs[PICA::InternalRegs::IndexBufferConfig];
u32 indexBufferPointer = vertexBase + (indexBufferConfig & 0xfffffff);
u8* indexBuffer = getPointerPhys<u8>(indexBufferPointer);
u16 minimumIndex = std::numeric_limits<u16>::max();
u16 maximumIndex = 0;
// Check whether the index buffer uses u16 indices or u8
accel.useShortIndices = Helpers::getBit<31>(indexBufferConfig); // Indicates whether vert indices are 16-bit or 8-bit
// Calculate the minimum and maximum indices used in the index buffer, so we'll only upload them
if (accel.useShortIndices) {
std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze<true>(indexBuffer, vertexCount);
} else {
std::tie(accel.minimumIndex, accel.maximumIndex) = PICA::IndexBuffer::analyze<false>(indexBuffer, vertexCount);
}
accel.indexBuffer = indexBuffer;
} else {
accel.indexBuffer = nullptr;
accel.minimumIndex = regs[PICA::InternalRegs::VertexOffsetReg];
accel.maximumIndex = accel.minimumIndex + vertexCount - 1;
}
const u64 vertexCfg = u64(regs[PICA::InternalRegs::AttribFormatLow]) | (u64(regs[PICA::InternalRegs::AttribFormatHigh]) << 32);
const u64 inputAttrCfg = getVertexShaderInputConfig();
u32 attrCount = 0;
u32 loaderOffset = 0;
accel.vertexDataSize = 0;
accel.totalLoaderCount = 0;
for (int i = 0; i < PICA::DrawAcceleration::maxLoaderCount; i++) {
auto& loaderData = attributeInfo[i]; // Get information for this attribute loader
// This loader is empty, skip it
if (loaderData.componentCount == 0 || loaderData.size == 0) {
continue;
}
auto& loader = accel.loaders[accel.totalLoaderCount++];
// The size of the loader in bytes is equal to the bytes supplied for 1 vertex, multiplied by the number of vertices we'll be uploading
// Which is equal to maximumIndex - minimumIndex + 1
const u32 bytes = loaderData.size * (accel.maximumIndex - accel.minimumIndex + 1);
loader.size = bytes;
// Add it to the total vertex data size, aligned to 4 bytes.
accel.vertexDataSize += (bytes + 3) & ~3;
// Get a pointer to the data where this loader's data is stored
const u32 loaderAddress = vertexBase + loaderData.offset + (accel.minimumIndex * loaderData.size);
loader.data = getPointerPhys<u8>(loaderAddress);
u64 attrCfg = loaderData.getConfigFull(); // Get config1 | (config2 << 32)
u32 attributeOffset = 0;
for (int component = 0; component < loaderData.componentCount; component++) {
uint attributeIndex = (attrCfg >> (component * 4)) & 0xf; // Get index of attribute in vertexCfg
// Vertex attributes used as padding
// 12, 13, 14 and 15 are equivalent to 4, 8, 12 and 16 bytes of padding respectively
if (attributeIndex >= 12) [[unlikely]] {
// Align attribute address up to a 4 byte boundary
attributeOffset = (attributeOffset + 3) & -4;
attributeOffset += (attributeIndex - 11) << 2;
continue;
}
const u32 attribInfo = (vertexCfg >> (attributeIndex * 4)) & 0xf;
const u32 attribType = attribInfo & 0x3; // Type of attribute (sbyte/ubyte/short/float)
const u32 size = (attribInfo >> 2) + 1; // Total number of components
// Size of each component based on the attribute type
static constexpr u32 sizePerComponent[4] = {1, 1, 2, 4};
const u32 inputReg = (inputAttrCfg >> (attributeIndex * 4)) & 0xf;
// Mark the attribute as enabled
accel.enabledAttributeMask |= 1 << inputReg;
auto& attr = accel.attributeInfo[inputReg];
attr.componentCount = size;
attr.offset = attributeOffset + loaderOffset;
attr.stride = loaderData.size;
attr.type = attribType;
attributeOffset += size * sizePerComponent[attribType];
}
loaderOffset += loader.size;
}
u32 fixedAttributes = fixedAttribMask;
accel.fixedAttributes = 0;
// Fetch values for all fixed attributes using CLZ on the fixed attribute mask to find the attributes that are actually fixed
while (fixedAttributes != 0) {
// Get index of next fixed attribute and turn it off
const u32 index = std::countr_zero<u32>(fixedAttributes);
const u32 mask = 1u << index;
fixedAttributes ^= mask;
// PICA register this fixed attribute is meant to go to
const u32 inputReg = (inputAttrCfg >> (index * 4)) & 0xf;
const u32 inputRegMask = 1u << inputReg;
// If this input reg is already used for a non-fixed attribute then it will not be replaced by a fixed attribute
if ((accel.enabledAttributeMask & inputRegMask) == 0) {
vec4f& fixedAttr = shaderUnit.vs.fixedAttributes[index];
auto& attr = accel.attributeInfo[inputReg];
accel.fixedAttributes |= inputRegMask;
for (int i = 0; i < 4; i++) {
attr.fixedValue[i] = fixedAttr[i].toFloat32();
}
}
}
accel.canBeAccelerated = true;
}

View file

@ -126,37 +126,62 @@ void GPU::reset() {
externalRegs[Framebuffer1Config] = static_cast<u32>(PICA::ColorFmt::RGB8);
externalRegs[Framebuffer1Select] = 0;
renderer->setUbershaderSetting(config.useUbershaders);
renderer->reset();
}
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
// And whether we are going to use the shader JIT (second template parameter)
void GPU::drawArrays(bool indexed) {
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
if (indexed) {
if (shaderJITEnabled)
drawArrays<true, true>();
else
drawArrays<true, false>();
} else {
if (shaderJITEnabled)
drawArrays<false, true>();
else
drawArrays<false, false>();
}
}
static std::array<PICA::Vertex, Renderer::vertexBufferSize> vertices;
template <bool indexed, bool useShaderJIT>
void GPU::drawArrays() {
if constexpr (useShaderJIT) {
shaderJIT.prepare(shaderUnit.vs);
// Call the correct version of drawArrays based on whether this is an indexed draw (first template parameter)
// And whether we are going to use the shader JIT (second template parameter)
void GPU::drawArrays(bool indexed) {
PICA::DrawAcceleration accel;
if (config.accelerateShaders) {
// If we are potentially going to use hw shaders, gather necessary to do vertex fetch, index buffering, etc on the GPU
// This includes parsing which vertices to upload, getting pointers to the index buffer data & vertex data, and so on
getAcceleratedDrawInfo(accel, indexed);
}
setVsOutputMask(regs[PICA::InternalRegs::VertexShaderOutputMask]);
const bool hwShaders = renderer->prepareForDraw(shaderUnit, &accel);
if (hwShaders) {
// Hardware shaders have their own accelerated code path for draws, so they skip everything here
const PICA::PrimType primType = static_cast<PICA::PrimType>(Helpers::getBits<8, 2>(regs[PICA::InternalRegs::PrimitiveConfig]));
// Total # of vertices to render
const u32 vertexCount = regs[PICA::InternalRegs::VertexCountReg];
// Note: In the hardware shader path the vertices span shouldn't actually be used as the renderer will perform its own attribute fetching
renderer->drawVertices(primType, std::span(vertices).first(vertexCount));
} else {
const bool shaderJITEnabled = ShaderJIT::isAvailable() && config.shaderJitEnabled;
if (indexed) {
if (shaderJITEnabled) {
drawArrays<true, ShaderExecMode::JIT>();
} else {
drawArrays<true, ShaderExecMode::Interpreter>();
}
} else {
if (shaderJITEnabled) {
drawArrays<false, ShaderExecMode::JIT>();
} else {
drawArrays<false, ShaderExecMode::Interpreter>();
}
}
}
}
template <bool indexed, ShaderExecMode mode>
void GPU::drawArrays() {
if constexpr (mode == ShaderExecMode::JIT) {
shaderJIT.prepare(shaderUnit.vs);
} else if constexpr (mode == ShaderExecMode::Hardware) {
// Hardware shaders have their own accelerated code path for draws, so they're not meant to take this path
Helpers::panic("GPU::DrawArrays: Hardware shaders shouldn't take this path!");
}
// We can have up to 16 attributes, each one consisting of 4 floats
constexpr u32 maxAttrSizeInFloats = 16 * 4;
// Base address for vertex attributes
// The vertex base is always on a quadword boundary because the PICA does weird alignment shit any time possible
@ -321,8 +346,6 @@ void GPU::drawArrays() {
}
// Fill the remaining attribute lanes with default parameters (1.0 for alpha/w, 0.0) for everything else
// Corgi does this although I'm not sure if it's actually needed for anything.
// TODO: Find out
while (component < 4) {
attribute[component] = (component == 3) ? f24::fromFloat32(1.0) : f24::fromFloat32(0.0);
component++;
@ -336,13 +359,13 @@ void GPU::drawArrays() {
// Before running the shader, the PICA maps the fetched attributes from the attribute registers to the shader input registers
// Based on the SH_ATTRIBUTES_PERMUTATION registers.
// Ie it might attribute #0 to v2, #1 to v7, etc
// Ie it might map attribute #0 to v2, #1 to v7, etc
for (int j = 0; j < totalAttribCount; j++) {
const u32 mapping = (inputAttrCfg >> (j * 4)) & 0xf;
std::memcpy(&shaderUnit.vs.inputs[mapping], &currentAttributes[j], sizeof(vec4f));
}
if constexpr (useShaderJIT) {
if constexpr (mode == ShaderExecMode::JIT) {
shaderJIT.run(shaderUnit.vs);
} else {
shaderUnit.vs.run();

View file

@ -249,6 +249,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
// If we've reached 3 verts, issue a draw call
// Handle rendering depending on the primitive type
if (immediateModeVertIndex == 3) {
renderer->prepareForDraw(shaderUnit, nullptr);
renderer->drawVertices(PICA::PrimType::TriangleList, immediateModeVertices);
switch (primType) {
@ -300,7 +301,7 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
}
case VertexBoolUniform: {
shaderUnit.vs.boolUniform = value & 0xffff;
shaderUnit.vs.uploadBoolUniform(value & 0xffff);
break;
}

View file

@ -1,5 +1,10 @@
#include "PICA/shader_decompiler.hpp"
#include <fmt/format.h>
#include <array>
#include <cassert>
#include "config.hpp"
using namespace PICA;
@ -13,11 +18,45 @@ void ControlFlow::analyze(const PICAShader& shader, u32 entrypoint) {
analysisFailed = false;
const Function* function = addFunction(shader, entrypoint, PICAShader::maxInstructionCount);
if (function == nullptr) {
if (function == nullptr || function->exitMode != ExitMode::AlwaysEnd) {
analysisFailed = true;
}
}
// Helpers for merging parallel/series exit methods from Citra
// Merges exit method of two parallel branches.
static ExitMode exitParallel(ExitMode a, ExitMode b) {
if (a == ExitMode::Unknown) {
return b;
}
else if (b == ExitMode::Unknown) {
return a;
}
else if (a == b) {
return a;
}
return ExitMode::Conditional;
}
// Cascades exit method of two blocks of code.
static ExitMode exitSeries(ExitMode a, ExitMode b) {
assert(a != ExitMode::AlwaysEnd);
if (a == ExitMode::Unknown) {
return ExitMode::Unknown;
}
if (a == ExitMode::AlwaysReturn) {
return b;
}
if (b == ExitMode::Unknown || b == ExitMode::AlwaysEnd) {
return ExitMode::AlwaysEnd;
}
return ExitMode::Conditional;
}
ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 end, Function::Labels& labels) {
// Initialize exit mode to unknown by default, in order to detect things like unending loops
auto [it, inserted] = exitMap.emplace(AddressRange(start, end), ExitMode::Unknown);
@ -32,25 +71,132 @@ ExitMode ControlFlow::analyzeFunction(const PICAShader& shader, u32 start, u32 e
const u32 opcode = instruction >> 26;
switch (opcode) {
case ShaderOpcodes::JMPC: Helpers::panic("Unimplemented control flow operation (JMPC)");
case ShaderOpcodes::JMPU: Helpers::panic("Unimplemented control flow operation (JMPU)");
case ShaderOpcodes::IFU: Helpers::panic("Unimplemented control flow operation (IFU)");
case ShaderOpcodes::IFC: Helpers::panic("Unimplemented control flow operation (IFC)");
case ShaderOpcodes::CALL: Helpers::panic("Unimplemented control flow operation (CALL)");
case ShaderOpcodes::CALLC: Helpers::panic("Unimplemented control flow operation (CALLC)");
case ShaderOpcodes::CALLU: Helpers::panic("Unimplemented control flow operation (CALLU)");
case ShaderOpcodes::LOOP: Helpers::panic("Unimplemented control flow operation (LOOP)");
case ShaderOpcodes::END: it->second = ExitMode::AlwaysEnd; return it->second;
case ShaderOpcodes::JMPC:
case ShaderOpcodes::JMPU: {
const u32 dest = getBits<10, 12>(instruction);
// Register this jump address to our outLabels set
labels.insert(dest);
// This opens up 2 parallel paths of execution
auto branchTakenExit = analyzeFunction(shader, dest, end, labels);
auto branchNotTakenExit = analyzeFunction(shader, pc + 1, end, labels);
it->second = exitParallel(branchTakenExit, branchNotTakenExit);
return it->second;
}
case ShaderOpcodes::IFU:
case ShaderOpcodes::IFC: {
const u32 num = instruction & 0xff;
const u32 dest = getBits<10, 12>(instruction);
const Function* branchTakenFunc = addFunction(shader, pc + 1, dest);
// Check if analysis of the branch taken func failed and return unknown if it did
if (analysisFailed) {
it->second = ExitMode::Unknown;
return it->second;
}
// Next analyze the not taken func
ExitMode branchNotTakenExitMode = ExitMode::AlwaysReturn;
if (num != 0) {
const Function* branchNotTakenFunc = addFunction(shader, dest, dest + num);
// Check if analysis failed and return unknown if it did
if (analysisFailed) {
it->second = ExitMode::Unknown;
return it->second;
}
branchNotTakenExitMode = branchNotTakenFunc->exitMode;
}
auto parallel = exitParallel(branchTakenFunc->exitMode, branchNotTakenExitMode);
// Both branches of the if/else end, so there's nothing after the call
if (parallel == ExitMode::AlwaysEnd) {
it->second = parallel;
return it->second;
} else {
ExitMode afterConditional = analyzeFunction(shader, dest + num, end, labels);
ExitMode conditionalExitMode = exitSeries(parallel, afterConditional);
it->second = conditionalExitMode;
return it->second;
}
break;
}
case ShaderOpcodes::CALL: {
const u32 num = instruction & 0xff;
const u32 dest = getBits<10, 12>(instruction);
const Function* calledFunction = addFunction(shader, dest, dest + num);
// Check if analysis of the branch taken func failed and return unknown if it did
if (analysisFailed) {
it->second = ExitMode::Unknown;
return it->second;
}
if (calledFunction->exitMode == ExitMode::AlwaysEnd) {
it->second = ExitMode::AlwaysEnd;
return it->second;
}
// Exit mode of the remainder of this function, after we return from the callee
const ExitMode postCallExitMode = analyzeFunction(shader, pc + 1, end, labels);
const ExitMode exitMode = exitSeries(calledFunction->exitMode, postCallExitMode);
it->second = exitMode;
return exitMode;
}
case ShaderOpcodes::CALLC:
case ShaderOpcodes::CALLU: {
const u32 num = instruction & 0xff;
const u32 dest = getBits<10, 12>(instruction);
const Function* calledFunction = addFunction(shader, dest, dest + num);
// Check if analysis of the branch taken func failed and return unknown if it did
if (analysisFailed) {
it->second = ExitMode::Unknown;
return it->second;
}
// Exit mode of the remainder of this function, after we return from the callee
const ExitMode postCallExitMode = analyzeFunction(shader, pc + 1, end, labels);
const ExitMode exitMode = exitSeries(exitParallel(calledFunction->exitMode, ExitMode::AlwaysReturn), postCallExitMode);
it->second = exitMode;
return exitMode;
}
case ShaderOpcodes::LOOP: {
u32 dest = getBits<10, 12>(instruction);
const Function* loopFunction = addFunction(shader, pc + 1, dest + 1);
if (analysisFailed) {
it->second = ExitMode::Unknown;
return it->second;
}
if (loopFunction->exitMode == ExitMode::AlwaysEnd) {
it->second = ExitMode::AlwaysEnd;
return it->second;
}
const ExitMode afterLoop = analyzeFunction(shader, dest + 1, end, labels);
const ExitMode exitMode = exitSeries(loopFunction->exitMode, afterLoop);
it->second = exitMode;
return it->second;
}
case ShaderOpcodes::END: it->second = ExitMode::AlwaysEnd; return it->second;
default: break;
}
}
// A function without control flow instructions will always reach its "return point" and return
return ExitMode::AlwaysReturn;
it->second = ExitMode::AlwaysReturn;
return it->second;
}
void ShaderDecompiler::compileRange(const AddressRange& range) {
std::pair<u32, bool> ShaderDecompiler::compileRange(const AddressRange& range) {
u32 pc = range.start;
const u32 end = range.end >= range.start ? range.end : PICAShader::maxInstructionCount;
bool finished = false;
@ -58,6 +204,8 @@ void ShaderDecompiler::compileRange(const AddressRange& range) {
while (pc < end && !finished) {
compileInstruction(pc, finished);
}
return std::make_pair(pc, finished);
}
const Function* ShaderDecompiler::findFunction(const AddressRange& range) {
@ -71,20 +219,43 @@ const Function* ShaderDecompiler::findFunction(const AddressRange& range) {
}
void ShaderDecompiler::writeAttributes() {
// Annoyingly, GLES does not support having an array as an input attribute, so declare each attribute separately for now
decompiledShader += R"(
layout(location = 0) in vec4 inputs[8];
layout(location = 0) in vec4 attr0;
layout(location = 1) in vec4 attr1;
layout(location = 2) in vec4 attr2;
layout(location = 3) in vec4 attr3;
layout(location = 4) in vec4 attr4;
layout(location = 5) in vec4 attr5;
layout(location = 6) in vec4 attr6;
layout(location = 7) in vec4 attr7;
layout(location = 8) in vec4 attr8;
layout(location = 9) in vec4 attr9;
layout(location = 10) in vec4 attr10;
layout(location = 11) in vec4 attr11;
layout(location = 12) in vec4 attr12;
layout(location = 13) in vec4 attr13;
layout(location = 14) in vec4 attr14;
layout(location = 15) in vec4 attr15;
layout(std140) uniform PICAShaderUniforms {
vec4 uniform_float[96];
uvec4 uniform_int;
uint uniform_bool;
};
vec4 temp_registers[16];
vec4 dummy_vec = vec4(0.0);
layout(std140) uniform PICAShaderUniforms {
vec4 uniform_f[96];
uvec4 uniform_i;
uint uniform_bool;
};
vec4 temp[16];
vec4 out_regs[16];
vec4 dummy_vec = vec4(0.0);
ivec3 addr_reg = ivec3(0);
bvec2 cmp_reg = bvec2(false);
vec4 uniform_indexed(int source, int offset) {
int clipped_offs = (offset >= -128 && offset <= 127) ? offset : 0;
uint index = uint(clipped_offs + source) & 127u;
return (index < 96u) ? uniform_f[index] : vec4(1.0);
}
)";
decompiledShader += "\n";
}
std::string ShaderDecompiler::decompile() {
@ -94,11 +265,14 @@ std::string ShaderDecompiler::decompile() {
return "";
}
decompiledShader = "";
compilationError = false;
decompiledShader.clear();
// Reserve some memory for the shader string to avoid memory allocations
decompiledShader.reserve(256 * 1024);
switch (api) {
case API::GL: decompiledShader += "#version 410 core\n"; break;
case API::GLES: decompiledShader += "#version 300 es\n"; break;
case API::GLES: decompiledShader += "#version 300 es\nprecision mediump float;\nprecision mediump int;\n"; break;
default: break;
}
@ -109,7 +283,7 @@ std::string ShaderDecompiler::decompile() {
decompiledShader += R"(
vec4 safe_mul(vec4 a, vec4 b) {
vec4 res = a * b;
return mix(res, mix(mix(vec4(0.0), res, isnan(rhs)), product, isnan(lhs)), isnan(res));
return mix(res, mix(mix(vec4(0.0), res, isnan(b)), res, isnan(a)), isnan(res));
}
)";
}
@ -121,17 +295,61 @@ std::string ShaderDecompiler::decompile() {
decompiledShader += "void pica_shader_main() {\n";
AddressRange mainFunctionRange(entrypoint, PICAShader::maxInstructionCount);
callFunction(*findFunction(mainFunctionRange));
decompiledShader += "}\n";
auto mainFunc = findFunction(mainFunctionRange);
for (auto& func : controlFlow.functions) {
if (func.outLabels.size() > 0) {
Helpers::panic("Function with out labels");
decompiledShader += mainFunc->getCallStatement() + ";\n}\n";
for (const Function& func : controlFlow.functions) {
if (func.outLabels.empty()) {
decompiledShader += fmt::format("bool {}() {{\n", func.getIdentifier());
auto [pc, finished] = compileRange(AddressRange(func.start, func.end));
if (!finished) {
decompiledShader += "return false;";
}
decompiledShader += "}\n";
} else {
auto labels = func.outLabels;
labels.insert(func.start);
// If a function has jumps and "labels", this needs to be emulated using a switch-case, with the variable being switched on being the
// current PC
decompiledShader += fmt::format("bool {}() {{\n", func.getIdentifier());
decompiledShader += fmt::format("uint pc = {}u;\n", func.start);
decompiledShader += "while(true){\nswitch(pc){\n";
for (u32 label : labels) {
decompiledShader += fmt::format("case {}u: {{", label);
// Fetch the next label whose address > label
auto it = labels.lower_bound(label + 1);
u32 next = (it == labels.end()) ? func.end : *it;
auto [endPC, finished] = compileRange(AddressRange(label, next));
if (endPC > next && !finished) {
labels.insert(endPC);
decompiledShader += fmt::format("pc = {}u; break;", endPC);
}
// Fallthrough to next label
decompiledShader += "}\n";
}
decompiledShader += "default: return false;\n";
// Exit the switch and loop
decompiledShader += "} }\n";
// Exit the function
decompiledShader += "return false;\n";
decompiledShader += "}\n";
}
}
decompiledShader += "void " + func.getIdentifier() + "() {\n";
compileRange(AddressRange(func.start, func.end));
decompiledShader += "}\n";
// We allow some leeway for "compilation errors" in addition to control flow errors, in cases where eg an unimplemented instruction
// or an instruction that we can't emulate in GLSL is found in the instruction stream. Just like control flow errors, these return an empty string
// and the renderer core will decide to use CPU shaders instead
if (compilationError) [[unlikely]] {
return "";
}
return decompiledShader;
@ -139,30 +357,41 @@ std::string ShaderDecompiler::decompile() {
std::string ShaderDecompiler::getSource(u32 source, [[maybe_unused]] u32 index) const {
if (source < 0x10) {
return "inputs[" + std::to_string(source) + "]";
return "attr" + std::to_string(source);
} else if (source < 0x20) {
return "temp_registers[" + std::to_string(source - 0x10) + "]";
return "temp[" + std::to_string(source - 0x10) + "]";
} else {
const usize floatIndex = (source - 0x20) & 0x7f;
if (floatIndex >= 96) [[unlikely]] {
return "dummy_vec";
if (index == 0) {
if (floatIndex >= 96) [[unlikely]] {
return "dummy_vec";
}
return "uniform_f[" + std::to_string(floatIndex) + "]";
} else {
static constexpr std::array<const char*, 4> offsets = {"0", "addr_reg.x", "addr_reg.y", "addr_reg.z"};
return fmt::format("uniform_indexed({}, {})", floatIndex, offsets[index]);
}
return "uniform_float[" + std::to_string(floatIndex) + "]";
}
}
std::string ShaderDecompiler::getDest(u32 dest) const {
if (dest < 0x10) {
return "output_registers[" + std::to_string(dest) + "]";
return "out_regs[" + std::to_string(dest) + "]";
} else if (dest < 0x20) {
return "temp_registers[" + std::to_string(dest - 0x10) + "]";
return "temp[" + std::to_string(dest - 0x10) + "]";
} else {
return "dummy_vec";
}
}
std::string ShaderDecompiler::getSwizzlePattern(u32 swizzle) const {
// If the swizzle field is this value then the swizzle pattern is .xyzw so we don't need a shuffle
static constexpr uint noSwizzle = 0x1B;
if (swizzle == noSwizzle) {
return "";
}
static constexpr std::array<char, 4> names = {'x', 'y', 'z', 'w'};
std::string ret(". ");
@ -176,7 +405,6 @@ std::string ShaderDecompiler::getSwizzlePattern(u32 swizzle) const {
std::string ShaderDecompiler::getDestSwizzle(u32 destinationMask) const {
std::string ret = ".";
if (destinationMask & 0b1000) {
ret += "x";
}
@ -208,11 +436,12 @@ void ShaderDecompiler::setDest(u32 operandDescriptor, const std::string& dest, c
return;
}
decompiledShader += dest + destSwizzle + " = ";
if (writtenLaneCount == 1) {
decompiledShader += "float(" + value + ");\n";
} else {
decompiledShader += "vec" + std::to_string(writtenLaneCount) + "(" + value + ");\n";
// Don't write destination swizzle if all lanes are getting written to
decompiledShader += fmt::format("{}{} = ", dest, writtenLaneCount == 4 ? "" : destSwizzle);
if (writtenLaneCount <= 3) {
decompiledShader += fmt::format("({}){};\n", value, destSwizzle);
} else if (writtenLaneCount == 4) {
decompiledShader += fmt::format("{};\n", value);
}
}
@ -246,26 +475,101 @@ void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) {
std::string dest = getDest(destIndex);
if (idx != 0) {
Helpers::panic("GLSL recompiler: Indexed instruction");
}
if (invertSources) {
Helpers::panic("GLSL recompiler: Inverted instruction");
}
switch (opcode) {
case ShaderOpcodes::MOV: setDest(operandDescriptor, dest, src1); break;
case ShaderOpcodes::ADD: setDest(operandDescriptor, dest, src1 + " + " + src2); break;
case ShaderOpcodes::MUL: setDest(operandDescriptor, dest, src1 + " * " + src2); break;
case ShaderOpcodes::MAX: setDest(operandDescriptor, dest, "max(" + src1 + ", " + src2 + ")"); break;
case ShaderOpcodes::MIN: setDest(operandDescriptor, dest, "min(" + src1 + ", " + src2 + ")"); break;
case ShaderOpcodes::ADD: setDest(operandDescriptor, dest, fmt::format("{} + {}", src1, src2)); break;
case ShaderOpcodes::MUL:
if (!config.accurateShaderMul) {
setDest(operandDescriptor, dest, fmt::format("{} * {}", src1, src2));
} else {
setDest(operandDescriptor, dest, fmt::format("safe_mul({}, {})", src1, src2));
}
break;
case ShaderOpcodes::MAX: setDest(operandDescriptor, dest, fmt::format("max({}, {})", src1, src2)); break;
case ShaderOpcodes::MIN: setDest(operandDescriptor, dest, fmt::format("min({}, {})", src1, src2)); break;
case ShaderOpcodes::DP3: setDest(operandDescriptor, dest, "vec4(dot(" + src1 + ".xyz, " + src2 + ".xyz))"); break;
case ShaderOpcodes::DP4: setDest(operandDescriptor, dest, "vec4(dot(" + src1 + ", " + src2 + "))"); break;
case ShaderOpcodes::RSQ: setDest(operandDescriptor, dest, "vec4(inversesqrt(" + src1 + ".x))"); break;
case ShaderOpcodes::DP3:
if (!config.accurateShaderMul) {
setDest(operandDescriptor, dest, fmt::format("vec4(dot({}.xyz, {}.xyz))", src1, src2));
} else {
// A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec3(1.0)
setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul({}, {}).xyz, vec3(1.0)))", src1, src2));
}
break;
case ShaderOpcodes::DP4:
if (!config.accurateShaderMul) {
setDest(operandDescriptor, dest, fmt::format("vec4(dot({}, {}))", src1, src2));
} else {
// A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec4(1.0)
setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul({}, {}), vec4(1.0)))", src1, src2));
}
break;
case ShaderOpcodes::FLR: setDest(operandDescriptor, dest, fmt::format("floor({})", src1)); break;
case ShaderOpcodes::RSQ: setDest(operandDescriptor, dest, fmt::format("vec4(inversesqrt({}.x))", src1)); break;
case ShaderOpcodes::RCP: setDest(operandDescriptor, dest, fmt::format("vec4(1.0 / {}.x)", src1)); break;
case ShaderOpcodes::LG2: setDest(operandDescriptor, dest, fmt::format("vec4(log2({}.x))", src1)); break;
case ShaderOpcodes::EX2: setDest(operandDescriptor, dest, fmt::format("vec4(exp2({}.x))", src1)); break;
default: Helpers::panic("GLSL recompiler: Unknown common opcode: %X", opcode); break;
case ShaderOpcodes::SLT:
case ShaderOpcodes::SLTI: setDest(operandDescriptor, dest, fmt::format("vec4(lessThan({}, {}))", src1, src2)); break;
case ShaderOpcodes::SGE:
case ShaderOpcodes::SGEI: setDest(operandDescriptor, dest, fmt::format("vec4(greaterThanEqual({}, {}))", src1, src2)); break;
case ShaderOpcodes::DPH:
case ShaderOpcodes::DPHI:
if (!config.accurateShaderMul) {
setDest(operandDescriptor, dest, fmt::format("vec4(dot(vec4({}.xyz, 1.0), {}))", src1, src2));
} else {
// A dot product between a and b is equivalent to the per-lane multiplication of a and b followed by a dot product with vec4(1.0)
setDest(operandDescriptor, dest, fmt::format("vec4(dot(safe_mul(vec4({}.xyz, 1.0), {}), vec4(1.0)))", src1, src2));
}
break;
case ShaderOpcodes::CMP1:
case ShaderOpcodes::CMP2: {
static constexpr std::array<const char*, 8> operators = {
// The last 2 operators always return true and are handled specially
"==", "!=", "<", "<=", ">", ">=", "", "",
};
const u32 cmpY = getBits<21, 3>(instruction);
const u32 cmpX = getBits<24, 3>(instruction);
// Compare x first
if (cmpX >= 6) {
decompiledShader += "cmp_reg.x = true;\n";
} else {
decompiledShader += fmt::format("cmp_reg.x = {}.x {} {}.x;\n", src1, operators[cmpX], src2);
}
// Then compare Y
if (cmpY >= 6) {
decompiledShader += "cmp_reg.y = true;\n";
} else {
decompiledShader += fmt::format("cmp_reg.y = {}.y {} {}.y;\n", src1, operators[cmpY], src2);
}
break;
}
case ShaderOpcodes::MOVA: {
const bool writeX = getBit<3>(operandDescriptor); // Should we write the x component of the address register?
const bool writeY = getBit<2>(operandDescriptor);
if (writeX && writeY) {
decompiledShader += fmt::format("addr_reg.xy = ivec2({}.xy);\n", src1);
} else if (writeX) {
decompiledShader += fmt::format("addr_reg.x = int({}.x);\n", src1);
} else if (writeY) {
decompiledShader += fmt::format("addr_reg.y = int({}.y);\n", src1);
}
break;
}
default:
Helpers::warn("GLSL recompiler: Unknown common opcode: %02X. Falling back to CPU shaders", opcode);
compilationError = true;
break;
}
} else if (opcode >= 0x30 && opcode <= 0x3F) { // MAD and MADI
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x1f];
@ -299,23 +603,156 @@ void ShaderDecompiler::compileInstruction(u32& pc, bool& finished) {
src3 += getSwizzlePattern(swizzle3);
std::string dest = getDest(destIndex);
if (idx != 0) {
Helpers::panic("GLSL recompiler: Indexed instruction");
if (!config.accurateShaderMul) {
setDest(operandDescriptor, dest, fmt::format("{} * {} + {}", src1, src2, src3));
} else {
setDest(operandDescriptor, dest, fmt::format("safe_mul({}, {}) + {}", src1, src2, src3));
}
setDest(operandDescriptor, dest, src1 + " * " + src2 + " + " + src3);
} else {
switch (opcode) {
case ShaderOpcodes::END: finished = true; return;
default: Helpers::panic("GLSL recompiler: Unknown opcode: %X", opcode); break;
case ShaderOpcodes::JMPC: {
const u32 dest = getBits<10, 12>(instruction);
const u32 condOp = getBits<22, 2>(instruction);
const uint refY = getBit<24>(instruction);
const uint refX = getBit<25>(instruction);
const char* condition = getCondition(condOp, refX, refY);
decompiledShader += fmt::format("if ({}) {{ pc = {}u; break; }}\n", condition, dest);
break;
}
case ShaderOpcodes::JMPU: {
const u32 dest = getBits<10, 12>(instruction);
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
const u32 mask = 1u << bit;
const u32 test = (instruction & 1) ^ 1; // If the LSB is 0 we jump if bit = 1, otherwise 0
decompiledShader += fmt::format("if ((uniform_bool & {}u) {} 0u) {{ pc = {}u; break; }}\n", mask, (test != 0) ? "!=" : "==", dest);
break;
}
case ShaderOpcodes::IFU:
case ShaderOpcodes::IFC: {
const u32 num = instruction & 0xff;
const u32 dest = getBits<10, 12>(instruction);
const Function* conditionalFunc = findFunction(AddressRange(pc + 1, dest));
if (opcode == ShaderOpcodes::IFC) {
const u32 condOp = getBits<22, 2>(instruction);
const uint refY = getBit<24>(instruction);
const uint refX = getBit<25>(instruction);
const char* condition = getCondition(condOp, refX, refY);
decompiledShader += fmt::format("if ({}) {{", condition);
} else {
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
const u32 mask = 1u << bit;
decompiledShader += fmt::format("if ((uniform_bool & {}u) != 0u) {{", mask);
}
callFunction(*conditionalFunc);
decompiledShader += "}\n";
pc = dest;
if (num > 0) {
const Function* elseFunc = findFunction(AddressRange(dest, dest + num));
pc = dest + num;
decompiledShader += "else { ";
callFunction(*elseFunc);
decompiledShader += "}\n";
if (conditionalFunc->exitMode == ExitMode::AlwaysEnd && elseFunc->exitMode == ExitMode::AlwaysEnd) {
finished = true;
return;
}
}
return;
}
case ShaderOpcodes::CALL:
case ShaderOpcodes::CALLC:
case ShaderOpcodes::CALLU: {
const u32 num = instruction & 0xff;
const u32 dest = getBits<10, 12>(instruction);
const Function* calledFunc = findFunction(AddressRange(dest, dest + num));
// Handle conditions for CALLC/CALLU
if (opcode == ShaderOpcodes::CALLC) {
const u32 condOp = getBits<22, 2>(instruction);
const uint refY = getBit<24>(instruction);
const uint refX = getBit<25>(instruction);
const char* condition = getCondition(condOp, refX, refY);
decompiledShader += fmt::format("if ({}) {{", condition);
} else if (opcode == ShaderOpcodes::CALLU) {
const u32 bit = getBits<22, 4>(instruction); // Bit of the bool uniform to check
const u32 mask = 1u << bit;
decompiledShader += fmt::format("if ((uniform_bool & {}u) != 0u) {{", mask);
}
callFunction(*calledFunc);
// Close brackets for CALLC/CALLU
if (opcode != ShaderOpcodes::CALL) {
decompiledShader += "}";
}
if (opcode == ShaderOpcodes::CALL && calledFunc->exitMode == ExitMode::AlwaysEnd) {
finished = true;
return;
}
break;
}
case ShaderOpcodes::LOOP: {
const u32 dest = getBits<10, 12>(instruction);
const u32 uniformIndex = getBits<22, 2>(instruction);
// loop counter = uniform.y
decompiledShader += fmt::format("addr_reg.z = int((uniform_i[{}] >> 8u) & 0xFFu);\n", uniformIndex);
decompiledShader += fmt::format(
"for (uint loopCtr{} = 0u; loopCtr{} <= (uniform_i[{}] & 0xFFu); loopCtr{}++, addr_reg.z += int((uniform_i[{}] >> "
"16u) & 0xFFu)) {{\n",
pc, pc, uniformIndex, pc, uniformIndex
);
AddressRange range(pc + 1, dest + 1);
const Function* func = findFunction(range);
callFunction(*func);
decompiledShader += "}\n";
// Jump to the end of the loop. We don't want to compile the code inside the loop again.
// This will be incremented by 1 due to the pc++ at the end of this loop.
pc = dest;
if (func->exitMode == ExitMode::AlwaysEnd) {
finished = true;
return;
}
break;
}
case ShaderOpcodes::END:
decompiledShader += "return true;\n";
finished = true;
return;
case ShaderOpcodes::NOP: break;
default:
Helpers::warn("GLSL recompiler: Unknown opcode: %02X. Falling back to CPU shaders", opcode);
compilationError = true;
break;
}
}
pc++;
}
bool ShaderDecompiler::usesCommonEncoding(u32 instruction) const {
const u32 opcode = instruction >> 26;
switch (opcode) {
@ -339,16 +776,57 @@ bool ShaderDecompiler::usesCommonEncoding(u32 instruction) const {
case ShaderOpcodes::SLT:
case ShaderOpcodes::SLTI:
case ShaderOpcodes::SGE:
case ShaderOpcodes::SGEI: return true;
case ShaderOpcodes::SGEI:
case ShaderOpcodes::LITP: return true;
default: return false;
}
}
void ShaderDecompiler::callFunction(const Function& function) { decompiledShader += function.getCallStatement() + ";\n"; }
void ShaderDecompiler::callFunction(const Function& function) {
switch (function.exitMode) {
// This function always ends, so call it and return true to signal that we're gonna be ending the shader
case ExitMode::AlwaysEnd: decompiledShader += function.getCallStatement() + ";\nreturn true;\n"; break;
// This function will potentially end. Call it, see if it returns that it ended, and return that we're ending if it did
case ExitMode::Conditional: decompiledShader += fmt::format("if ({}) {{ return true; }}\n", function.getCallStatement()); break;
// This function will not end. Just call it like a normal function.
default: decompiledShader += function.getCallStatement() + ";\n"; break;
}
}
std::string ShaderGen::decompileShader(PICAShader& shader, EmulatorConfig& config, u32 entrypoint, API api, Language language) {
ShaderDecompiler decompiler(shader, config, entrypoint, api, language);
return decompiler.decompile();
}
const char* ShaderDecompiler::getCondition(u32 cond, u32 refX, u32 refY) {
static constexpr std::array<const char*, 16> conditions = {
// ref(Y, X) = (0, 0)
"!all(cmp_reg)",
"all(not(cmp_reg))",
"!cmp_reg.x",
"!cmp_reg.y",
// ref(Y, X) = (0, 1)
"cmp_reg.x || !cmp_reg.y",
"cmp_reg.x && !cmp_reg.y",
"cmp_reg.x",
"!cmp_reg.y",
// ref(Y, X) = (1, 0)
"!cmp_reg.x || cmp_reg.y",
"!cmp_reg.x && cmp_reg.y",
"!cmp_reg.x",
"cmp_reg.y",
// ref(Y, X) = (1, 1)
"any(cmp_reg)",
"all(cmp_reg)",
"cmp_reg.x",
"cmp_reg.y",
};
const u32 key = (cond & 0b11) | (refX << 2) | (refY << 3);
return conditions[key];
}

View file

@ -1,6 +1,14 @@
#include <fmt/format.h>
#include <utility>
#include "PICA/pica_frag_config.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_gen.hpp"
// We can include the driver headers here since they shouldn't have any actual API-specific code
#include "renderer_gl/gl_driver.hpp"
using namespace PICA;
using namespace PICA::ShaderGen;
@ -34,6 +42,8 @@ static constexpr const char* uniformDefinition = R"(
std::string FragmentGenerator::getDefaultVertexShader() {
std::string ret = "";
// Reserve some space (128KB) in the output string to avoid too many allocations later
ret.reserve(128 * 1024);
switch (api) {
case API::GL: ret += "#version 410 core"; break;
@ -94,7 +104,7 @@ std::string FragmentGenerator::getDefaultVertexShader() {
return ret;
}
std::string FragmentGenerator::generate(const FragmentConfig& config) {
std::string FragmentGenerator::generate(const FragmentConfig& config, void* driverInfo) {
std::string ret = "";
switch (api) {
@ -103,6 +113,27 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) {
default: break;
}
// For GLES we need to enable & use the framebuffer fetch extension in order to emulate logic ops
bool emitLogicOps = api == API::GLES && config.outConfig.logicOpMode != PICA::LogicOpMode::Copy && driverInfo != nullptr;
if (emitLogicOps) {
auto driver = static_cast<OpenGL::Driver*>(driverInfo);
// If the driver does not support framebuffer fetch at all, don't emit logic op code
if (!driver->supportFbFetch()) {
emitLogicOps = false;
}
// Figure out which fb fetch extension we have and enable it
else {
if (driver->supportsExtFbFetch) {
ret += "\n#extension GL_EXT_shader_framebuffer_fetch : enable\n#define fb_color fragColor\n";
} else if (driver->supportsArmFbFetch) {
ret += "\n#extension GL_ARM_shader_framebuffer_fetch : enable\n#define fb_color gl_LastFragColorARM[0]\n";
}
}
}
bool unimplementedFlag = false;
if (api == API::GLES) {
ret += R"(
@ -192,10 +223,13 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) {
}
compileFog(ret, config);
applyAlphaTest(ret, config);
ret += "fragColor = combinerOutput;\n}"; // End of main function
if (!emitLogicOps) {
ret += "fragColor = combinerOutput;\n}"; // End of main function
} else {
compileLogicOps(ret, config);
}
return ret;
}
@ -671,3 +705,135 @@ void FragmentGenerator::compileFog(std::string& shader, const PICA::FragmentConf
shader += "float fog_factor = clamp(value.r + value.g * delta, 0.0, 1.0);";
shader += "combinerOutput.rgb = mix(fog_color, combinerOutput.rgb, fog_factor);";
}
std::string FragmentGenerator::getVertexShaderAccelerated(const std::string& picaSource, const PICA::VertConfig& vertConfig, bool usingUbershader) {
// First, calculate output register -> Fixed function fragment semantics based on the VAO config
// This array contains the mappings for the 32 fixed function semantics (8 variables, with 4 lanes each).
// Each entry is a pair, containing the output reg to use for this semantic (first) and which lane of that register (second)
std::array<std::pair<int, int>, 32> outputMappings{};
// Output registers adjusted according to VS_OUTPUT_MASK, which handles enabling and disabling output attributes
std::array<u8, 16> vsOutputRegisters;
{
uint count = 0;
u16 outputMask = vertConfig.outputMask;
// See which registers are actually enabled and ignore the disabled ones
for (int i = 0; i < 16; i++) {
if (outputMask & 1) {
vsOutputRegisters[count++] = i;
}
outputMask >>= 1;
}
// For the others, map the index to a vs output directly (TODO: What does hw actually do?)
for (; count < 16; count++) {
vsOutputRegisters[count] = count;
}
for (int i = 0; i < vertConfig.outputCount; i++) {
const u32 config = vertConfig.outmaps[i];
for (int j = 0; j < 4; j++) {
const u32 mapping = (config >> (j * 8)) & 0x1F;
outputMappings[mapping] = std::make_pair(vsOutputRegisters[i], j);
}
}
}
auto getSemanticName = [&](u32 semanticIndex) {
auto [reg, lane] = outputMappings[semanticIndex];
return fmt::format("out_regs[{}][{}]", reg, lane);
};
std::string semantics = fmt::format(
R"(
vec4 a_coords = vec4({}, {}, {}, {});
vec4 a_quaternion = vec4({}, {}, {}, {});
vec4 a_vertexColour = vec4({}, {}, {}, {});
vec2 a_texcoord0 = vec2({}, {});
float a_texcoord0_w = {};
vec2 a_texcoord1 = vec2({}, {});
vec2 a_texcoord2 = vec2({}, {});
vec3 a_view = vec3({}, {}, {});
)",
getSemanticName(0), getSemanticName(1), getSemanticName(2), getSemanticName(3), getSemanticName(4), getSemanticName(5), getSemanticName(6),
getSemanticName(7), getSemanticName(8), getSemanticName(9), getSemanticName(10), getSemanticName(11), getSemanticName(12),
getSemanticName(13), getSemanticName(16), getSemanticName(14), getSemanticName(15), getSemanticName(22), getSemanticName(23),
getSemanticName(18), getSemanticName(19), getSemanticName(20)
);
if (usingUbershader) {
Helpers::panic("Unimplemented: GetVertexShaderAccelerated for ubershader");
return picaSource;
} else {
// TODO: Uniforms and don't hardcode fixed-function semantic indices...
std::string ret = picaSource;
if (api == API::GLES) {
ret += "\n#define USING_GLES\n";
}
ret += uniformDefinition;
ret += R"(
out vec4 v_quaternion;
out vec4 v_colour;
out vec3 v_texcoord0;
out vec2 v_texcoord1;
out vec3 v_view;
out vec2 v_texcoord2;
#ifndef USING_GLES
out float gl_ClipDistance[2];
#endif
void main() {
pica_shader_main();
)";
// Transfer fixed function fragment registers from vertex shader output to the fragment shader
ret += semantics;
ret += R"(
gl_Position = a_coords;
vec4 colourAbs = abs(a_vertexColour);
v_colour = min(colourAbs, vec4(1.f));
v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
v_view = a_view;
v_quaternion = a_quaternion;
#ifndef USING_GLES
gl_ClipDistance[0] = -a_coords.z;
gl_ClipDistance[1] = dot(clipCoords, a_coords);
#endif
})";
return ret;
}
}
void FragmentGenerator::compileLogicOps(std::string& shader, const PICA::FragmentConfig& config) {
if (api != API::GLES) [[unlikely]] {
Helpers::warn("Shadergen: Unsupported API for compileLogicOps");
shader += "fragColor = combinerOutput;\n}"; // End of main function
return;
}
shader += "fragColor = ";
switch (config.outConfig.logicOpMode) {
case PICA::LogicOpMode::Copy: shader += "combinerOutput"; break;
case PICA::LogicOpMode::Nop: shader += "fb_color"; break;
case PICA::LogicOpMode::Clear: shader += "vec4(0.0)"; break;
case PICA::LogicOpMode::Set: shader += "vec4(1.0)"; break;
case PICA::LogicOpMode::InvertedCopy: shader += "vec4(uvec4(combinerOutput * 255.0) ^ uvec4(0xFFu)) * (1.0 / 255.0)"; break;
default:
shader += "combinerOutput";
Helpers::warn("Shadergen: Unimplemented logic op mode");
break;
}
shader += ";\n}"; // End of main function
}

View file

@ -34,4 +34,5 @@ void PICAShader::reset() {
codeHashDirty = true;
opdescHashDirty = true;
uniformsDirty = true;
}

View file

@ -76,6 +76,7 @@ namespace Audio {
source.reset();
}
mixer.reset();
// Note: Reset audio pipe AFTER resetting all pipes, otherwise the new data will be yeeted
resetAudioPipe();
}
@ -250,6 +251,8 @@ namespace Audio {
source.isBufferIDDirty = false;
}
performMix(read, write);
}
void HLE_DSP::updateSourceConfig(Source& source, HLE::SourceConfiguration::Configuration& config, s16_le* adpcmCoefficients) {
@ -465,6 +468,50 @@ namespace Audio {
}
}
void HLE_DSP::performMix(Audio::HLE::SharedMemory& readRegion, Audio::HLE::SharedMemory& writeRegion) {
updateMixerConfig(readRegion);
// TODO: Do the actual audio mixing
auto& dspStatus = writeRegion.dspStatus;
// Stub the DSP status. It's unknown what the "unknown" field is but Citra sets it to 0, so we do too to be safe
dspStatus.droppedFrames = 0;
dspStatus.unknown = 0;
}
void HLE_DSP::updateMixerConfig(Audio::HLE::SharedMemory& sharedMem) {
auto& config = sharedMem.dspConfiguration;
// No configs have been changed, so there's nothing to update
if (config.dirtyRaw == 0) {
return;
}
if (config.outputFormatDirty) {
mixer.channelFormat = config.outputFormat;
}
if (config.masterVolumeDirty) {
mixer.volumes[0] = config.masterVolume;
}
if (config.auxVolume0Dirty) {
mixer.volumes[1] = config.auxVolumes[0];
}
if (config.auxVolume1Dirty) {
mixer.volumes[2] = config.auxVolumes[1];
}
if (config.auxBusEnable0Dirty) {
mixer.enableAuxStages[0] = config.auxBusEnable[0] != 0;
}
if (config.auxBusEnable1Dirty) {
mixer.enableAuxStages[1] = config.auxBusEnable[1] != 0;
}
config.dirtyRaw = 0;
}
HLE_DSP::SampleBuffer HLE_DSP::decodePCM8(const u8* data, usize sampleCount, Source& source) {
SampleBuffer decodedSamples(sampleCount);
@ -585,7 +632,7 @@ namespace Audio {
AAC::Message response;
switch (request.command) {
case AAC::Command::EncodeDecode:
case AAC::Command::EncodeDecode: {
// Dummy response to stop games from hanging
response.resultCode = AAC::ResultCode::Success;
response.decodeResponse.channelCount = 2;
@ -596,10 +643,13 @@ namespace Audio {
response.command = request.command;
response.mode = request.mode;
// We've already got an AAC decoder but it's currently disabled until mixing & output is properly implemented
// TODO: Uncomment this when the time comes
// aacDecoder->decode(response, request, [this](u32 paddr) { return getPointerPhys<u8>(paddr); });
// TODO: Make this a toggle in config.toml. Currently we have it off by default until we finish the DSP mixer.
constexpr bool enableAAC = false;
if (enableAAC) {
aacDecoder->decode(response, request, [this](u32 paddr) { return getPointerPhys<u8>(paddr); });
}
break;
}
case AAC::Command::Init:
case AAC::Command::Shutdown:

View file

@ -136,7 +136,7 @@ void Kernel::mapMemoryBlock() {
break;
case KernelHandles::FontSharedMemHandle:
mem.copySharedFont(ptr);
mem.copySharedFont(ptr, addr);
break;
case KernelHandles::CSNDSharedMemHandle:

View file

@ -7,6 +7,7 @@
#include "config_mem.hpp"
#include "resource_limits.hpp"
#include "services/fonts.hpp"
#include "services/ptm.hpp"
CMRC_DECLARE(ConsoleFonts);
@ -51,7 +52,7 @@ void Memory::reset() {
if (e.handle == KernelHandles::FontSharedMemHandle) {
// Read font size from the cmrc filesystem the font is stored in
auto fonts = cmrc::ConsoleFonts::get_filesystem();
e.size = fonts.open("CitraSharedFontUSRelocated.bin").size();
e.size = fonts.open("SharedFontReplacement.bin").size();
}
e.mapped = false;
@ -520,10 +521,13 @@ Regions Memory::getConsoleRegion() {
return region;
}
void Memory::copySharedFont(u8* pointer) {
void Memory::copySharedFont(u8* pointer, u32 vaddr) {
auto fonts = cmrc::ConsoleFonts::get_filesystem();
auto font = fonts.open("CitraSharedFontUSRelocated.bin");
auto font = fonts.open("SharedFontReplacement.bin");
std::memcpy(pointer, font.begin(), font.size());
// Relocate shared font to the address it's being loaded to
HLE::Fonts::relocateSharedFont(pointer, vaddr);
}
std::optional<u64> Memory::getProgramID() {

View file

@ -73,10 +73,7 @@ void GLStateManager::resetVAO() {
}
void GLStateManager::resetBuffers() {
boundVBO = 0;
boundUBO = 0;
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}

View file

@ -2,13 +2,16 @@
#include <stb_image_write.h>
#include <bit>
#include <cmrc/cmrc.hpp>
#include "config.hpp"
#include "PICA/float_types.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/gpu.hpp"
#include "PICA/pica_frag_uniforms.hpp"
#include "PICA/pica_simd.hpp"
#include "PICA/regs.hpp"
#include "PICA/shader_decompiler.hpp"
#include "config.hpp"
#include "math_util.hpp"
CMRC_DECLARE(RendererGL);
@ -24,7 +27,7 @@ void RendererGL::reset() {
colourBufferCache.reset();
textureCache.reset();
clearShaderCache();
shaderCache.clear();
// Init the colour/depth buffer settings to some random defaults on reset
colourBufferLoc = 0;
@ -77,40 +80,56 @@ void RendererGL::initGraphicsContextInternal() {
gl.useProgram(displayProgram);
glUniform1i(OpenGL::uniformLocation(displayProgram, "u_texture"), 0); // Init sampler object
// Create stream buffers for vertex, index and uniform buffers
static constexpr usize hwIndexBufferSize = 2_MB;
static constexpr usize hwVertexBufferSize = 16_MB;
hwIndexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, hwIndexBufferSize);
hwVertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, hwVertexBufferSize);
// Allocate memory for the shadergen fragment uniform UBO
glGenBuffers(1, &shadergenFragmentUBO);
gl.bindUBO(shadergenFragmentUBO);
glBufferData(GL_UNIFORM_BUFFER, sizeof(PICA::FragmentUniforms), nullptr, GL_DYNAMIC_DRAW);
vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize, GL_STREAM_DRAW);
gl.bindVBO(vbo);
vao.create();
gl.bindVAO(vao);
// Allocate memory for the accelerated vertex shader uniform UBO
glGenBuffers(1, &hwShaderUniformUBO);
gl.bindUBO(hwShaderUniformUBO);
glBufferData(GL_UNIFORM_BUFFER, PICAShader::totalUniformSize(), nullptr, GL_DYNAMIC_DRAW);
vbo.createFixedSize(sizeof(Vertex) * vertexBufferSize * 2, GL_STREAM_DRAW);
vbo.bind();
// Initialize the VAO used when not using hw shaders
defaultVAO.create();
gl.bindVAO(defaultVAO);
// Position (x, y, z, w) attributes
vao.setAttributeFloat<float>(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions));
vao.enableAttribute(0);
defaultVAO.setAttributeFloat<float>(0, 4, sizeof(Vertex), offsetof(Vertex, s.positions));
defaultVAO.enableAttribute(0);
// Quaternion attribute
vao.setAttributeFloat<float>(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion));
vao.enableAttribute(1);
defaultVAO.setAttributeFloat<float>(1, 4, sizeof(Vertex), offsetof(Vertex, s.quaternion));
defaultVAO.enableAttribute(1);
// Colour attribute
vao.setAttributeFloat<float>(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour));
vao.enableAttribute(2);
defaultVAO.setAttributeFloat<float>(2, 4, sizeof(Vertex), offsetof(Vertex, s.colour));
defaultVAO.enableAttribute(2);
// UV 0 attribute
vao.setAttributeFloat<float>(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0));
vao.enableAttribute(3);
defaultVAO.setAttributeFloat<float>(3, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord0));
defaultVAO.enableAttribute(3);
// UV 1 attribute
vao.setAttributeFloat<float>(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1));
vao.enableAttribute(4);
defaultVAO.setAttributeFloat<float>(4, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord1));
defaultVAO.enableAttribute(4);
// UV 0 W-component attribute
vao.setAttributeFloat<float>(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w));
vao.enableAttribute(5);
defaultVAO.setAttributeFloat<float>(5, 1, sizeof(Vertex), offsetof(Vertex, s.texcoord0_w));
defaultVAO.enableAttribute(5);
// View
vao.setAttributeFloat<float>(6, 3, sizeof(Vertex), offsetof(Vertex, s.view));
vao.enableAttribute(6);
defaultVAO.setAttributeFloat<float>(6, 3, sizeof(Vertex), offsetof(Vertex, s.view));
defaultVAO.enableAttribute(6);
// UV 2 attribute
vao.setAttributeFloat<float>(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2));
vao.enableAttribute(7);
defaultVAO.setAttributeFloat<float>(7, 2, sizeof(Vertex), offsetof(Vertex, s.texcoord2));
defaultVAO.enableAttribute(7);
// Initialize the VAO used for hw shaders
hwShaderVAO.create();
dummyVBO.create();
dummyVAO.create();
@ -165,8 +184,18 @@ void RendererGL::initGraphicsContextInternal() {
OpenGL::clearColor();
OpenGL::setViewport(oldViewport[0], oldViewport[1], oldViewport[2], oldViewport[3]);
// Initialize fixed attributes
for (int i = 0; i < fixedAttrValues.size(); i++) {
fixedAttrValues[i] = {0.f, 0.f, 0.f, 0.f};
glVertexAttrib4f(i, 0.0, 0.0, 0.0, 0.0);
}
reset();
// Populate our driver info structure
driverInfo.supportsExtFbFetch = (GLAD_GL_EXT_shader_framebuffer_fetch != 0);
driverInfo.supportsArmFbFetch = (GLAD_GL_ARM_shader_framebuffer_fetch != 0);
// Initialize the default vertex shader used with shadergen
std::string defaultShadergenVSSource = fragShaderGen.getDefaultVertexShader();
defaultShadergenVs.create({defaultShadergenVSSource.c_str(), defaultShadergenVSSource.size()}, OpenGL::Vertex);
@ -414,29 +443,14 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
OpenGL::Triangle,
};
bool usingUbershader = enableUbershader;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
if (usingUbershader) {
gl.useProgram(triangleProgram);
} else {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
}
const auto primitiveTopology = primTypes[static_cast<usize>(primType)];
gl.disableScissor();
gl.bindVBO(vbo);
gl.bindVAO(vao);
// If we're using accelerated shaders, the hw VAO, VBO and EBO objects will have already been bound in prepareForDraw
if (!usingAcceleratedShader) {
vbo.bind();
gl.bindVAO(defaultVAO);
}
gl.enableClipPlane(0); // Clipping plane 0 is always enabled
if (regs[PICA::InternalRegs::ClipEnable] & 1) {
@ -454,38 +468,9 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
const int depthFunc = getBits<4, 3>(depthControl);
const int colourMask = getBits<8, 4>(depthControl);
gl.setColourMask(colourMask & 1, colourMask & 2, colourMask & 4, colourMask & 8);
static constexpr std::array<GLenum, 8> depthModes = {GL_NEVER, GL_ALWAYS, GL_EQUAL, GL_NOTEQUAL, GL_LESS, GL_LEQUAL, GL_GREATER, GL_GEQUAL};
// Update ubershader uniforms
if (usingUbershader) {
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
bindTexturesToSlots();
if (gpu.fogLUTDirty) {
updateFogLUT();
}
@ -528,8 +513,32 @@ void RendererGL::drawVertices(PICA::PrimType primType, std::span<const Vertex> v
setupStencilTest(stencilEnable);
vbo.bufferVertsSub(vertices);
OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
if (!usingAcceleratedShader) {
vbo.bufferVertsSub(vertices);
OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
} else {
if (performIndexedRender) {
// When doing indexed rendering, use glDrawRangeElementsBaseVertex to issue the indexed draw
hwIndexBuffer->Bind();
if (glDrawRangeElementsBaseVertex != nullptr) [[likely]] {
glDrawRangeElementsBaseVertex(
primitiveTopology, minimumIndex, maximumIndex, GLsizei(vertices.size()), usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
hwIndexBufferOffset, -GLint(minimumIndex)
);
} else {
// If glDrawRangeElementsBaseVertex is not available then prepareForDraw will have subtracted the base vertex from the index buffer
// for us, so just use glDrawRangeElements
glDrawRangeElements(
primitiveTopology, 0, GLint(maximumIndex - minimumIndex), GLsizei(vertices.size()),
usingShortIndices ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE, hwIndexBufferOffset
);
}
} else {
// When doing non-indexed rendering, just use glDrawArrays
OpenGL::draw(primitiveTopology, GLsizei(vertices.size()));
}
}
}
void RendererGL::display() {
@ -836,34 +845,53 @@ std::optional<ColourBuffer> RendererGL::getColourBuffer(u32 addr, PICA::ColorFmt
}
OpenGL::Program& RendererGL::getSpecializedShader() {
constexpr uint uboBlockBinding = 2;
constexpr uint vsUBOBlockBinding = 1;
constexpr uint fsUBOBlockBinding = 2;
PICA::FragmentConfig fsConfig(regs);
// If we're not on GLES, ignore the logic op configuration and don't generate redundant shaders for it, since we use hw logic ops
#ifndef USING_GLES
fsConfig.outConfig.logicOpMode = PICA::LogicOpMode(0);
#endif
CachedProgram& programEntry = shaderCache[fsConfig];
OpenGL::Shader& fragShader = shaderCache.fragmentShaderCache[fsConfig];
if (!fragShader.exists()) {
std::string fs = fragShaderGen.generate(fsConfig);
fragShader.create({fs.c_str(), fs.size()}, OpenGL::Fragment);
}
// Get the handle of the current vertex shader
OpenGL::Shader& vertexShader = usingAcceleratedShader ? *generatedVertexShader : defaultShadergenVs;
// And form the key for looking up a shader program
const u64 programKey = (u64(vertexShader.handle()) << 32) | u64(fragShader.handle());
CachedProgram& programEntry = shaderCache.programCache[programKey];
OpenGL::Program& program = programEntry.program;
if (!program.exists()) {
std::string fs = fragShaderGen.generate(fsConfig);
OpenGL::Shader fragShader({fs.c_str(), fs.size()}, OpenGL::Fragment);
program.create({defaultShadergenVs, fragShader});
program.create({vertexShader, fragShader});
gl.useProgram(program);
fragShader.free();
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
// Set up the binding for our UBO. Sadly we can't specify it in the shader like normal people,
// Set up the binding for our UBOs. Sadly we can't specify it in the shader like normal people,
// As it's an OpenGL 4.2 feature that MacOS doesn't support...
uint uboIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
glUniformBlockBinding(program.handle(), uboIndex, uboBlockBinding);
uint fsUBOIndex = glGetUniformBlockIndex(program.handle(), "FragmentUniforms");
glUniformBlockBinding(program.handle(), fsUBOIndex, fsUBOBlockBinding);
if (usingAcceleratedShader) {
uint vertexUBOIndex = glGetUniformBlockIndex(program.handle(), "PICAShaderUniforms");
glUniformBlockBinding(program.handle(), vertexUBOIndex, vsUBOBlockBinding);
}
}
glBindBufferBase(GL_UNIFORM_BUFFER, fsUBOBlockBinding, shadergenFragmentUBO);
if (usingAcceleratedShader) {
glBindBufferBase(GL_UNIFORM_BUFFER, vsUBOBlockBinding, hwShaderUniformUBO);
}
glBindBufferBase(GL_UNIFORM_BUFFER, uboBlockBinding, shadergenFragmentUBO);
// Upload uniform data to our shader's UBO
PICA::FragmentUniforms uniforms;
@ -953,6 +981,101 @@ OpenGL::Program& RendererGL::getSpecializedShader() {
return program;
}
bool RendererGL::prepareForDraw(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) {
// First we figure out if we will be using an ubershader
bool usingUbershader = emulatorConfig->useUbershaders;
if (usingUbershader) {
const bool lightsEnabled = (regs[InternalRegs::LightingEnable] & 1) != 0;
const uint lightCount = (regs[InternalRegs::LightNumber] & 0x7) + 1;
// Emulating lights in the ubershader is incredibly slow, so we've got an option to render draws using moret han N lights via shadergen
// This way we generate fewer shaders overall than with full shadergen, but don't tank performance
if (emulatorConfig->forceShadergenForLights && lightsEnabled && lightCount >= emulatorConfig->lightShadergenThreshold) {
usingUbershader = false;
}
}
// Then we figure out if we will use hw accelerated shaders, and try to fetch our shader
// TODO: Ubershader support for accelerated shaders
usingAcceleratedShader = emulatorConfig->accelerateShaders && !usingUbershader && accel != nullptr && accel->canBeAccelerated;
if (usingAcceleratedShader) {
PICA::VertConfig vertexConfig(shaderUnit.vs, regs, usingUbershader);
std::optional<OpenGL::Shader>& shader = shaderCache.vertexShaderCache[vertexConfig];
// If the optional is false, we have never tried to recompile the shader before. Try to recompile it and see if it works.
if (!shader.has_value()) {
// Initialize shader to a "null" shader (handle == 0)
shader = OpenGL::Shader();
std::string picaShaderSource = PICA::ShaderGen::decompileShader(
shaderUnit.vs, *emulatorConfig, shaderUnit.vs.entrypoint,
Helpers::isAndroid() ? PICA::ShaderGen::API::GLES : PICA::ShaderGen::API::GL, PICA::ShaderGen::Language::GLSL
);
// Empty source means compilation error, if the source is not empty then we convert the recompiled PICA code into a valid shader and upload
// it to the GPU
if (!picaShaderSource.empty()) {
std::string vertexShaderSource = fragShaderGen.getVertexShaderAccelerated(picaShaderSource, vertexConfig, usingUbershader);
shader->create({vertexShaderSource}, OpenGL::Vertex);
}
}
// Shader generation did not work out, so set usingAcceleratedShader to false
if (!shader->exists()) {
usingAcceleratedShader = false;
} else {
generatedVertexShader = &(*shader);
gl.bindUBO(hwShaderUniformUBO);
if (shaderUnit.vs.uniformsDirty) {
shaderUnit.vs.uniformsDirty = false;
glBufferSubData(GL_UNIFORM_BUFFER, 0, PICAShader::totalUniformSize(), shaderUnit.vs.getUniformPointer());
}
performIndexedRender = accel->indexed;
minimumIndex = GLsizei(accel->minimumIndex);
maximumIndex = GLsizei(accel->maximumIndex);
// Upload vertex data and index buffer data to our GPU
accelerateVertexUpload(shaderUnit, accel);
}
}
if (!usingUbershader) {
OpenGL::Program& program = getSpecializedShader();
gl.useProgram(program);
} else { // Bind ubershader & load ubershader uniforms
gl.useProgram(triangleProgram);
const float depthScale = f24::fromRaw(regs[PICA::InternalRegs::DepthScale] & 0xffffff).toFloat32();
const float depthOffset = f24::fromRaw(regs[PICA::InternalRegs::DepthOffset] & 0xffffff).toFloat32();
const bool depthMapEnable = regs[PICA::InternalRegs::DepthmapEnable] & 1;
if (oldDepthScale != depthScale) {
oldDepthScale = depthScale;
glUniform1f(ubershaderData.depthScaleLoc, depthScale);
}
if (oldDepthOffset != depthOffset) {
oldDepthOffset = depthOffset;
glUniform1f(ubershaderData.depthOffsetLoc, depthOffset);
}
if (oldDepthmapEnable != depthMapEnable) {
oldDepthmapEnable = depthMapEnable;
glUniform1i(ubershaderData.depthmapEnableLoc, depthMapEnable);
}
// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x48)
// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
glUniform1uiv(ubershaderData.picaRegLoc, 0x200 - 0x48, &regs[0x48]);
setupUbershaderTexEnv();
}
return usingAcceleratedShader;
}
void RendererGL::screenshot(const std::string& name) {
constexpr uint width = 400;
constexpr uint height = 2 * 240;
@ -966,7 +1089,7 @@ void RendererGL::screenshot(const std::string& name) {
// Flip the image vertically
for (int y = 0; y < height; y++) {
memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4);
std::memcpy(&flippedPixels[y * width * 4], &pixels[(height - y - 1) * width * 4], width * 4);
// Swap R and B channels
for (int x = 0; x < width; x++) {
std::swap(flippedPixels[y * width * 4 + x * 4 + 0], flippedPixels[y * width * 4 + x * 4 + 2]);
@ -978,21 +1101,12 @@ void RendererGL::screenshot(const std::string& name) {
stbi_write_png(name.c_str(), width, height, 4, flippedPixels.data(), 0);
}
void RendererGL::clearShaderCache() {
for (auto& shader : shaderCache) {
CachedProgram& cachedProgram = shader.second;
cachedProgram.program.free();
}
shaderCache.clear();
}
void RendererGL::deinitGraphicsContext() {
// Invalidate all surface caches since they'll no longer be valid
textureCache.reset();
depthBufferCache.reset();
colourBufferCache.reset();
clearShaderCache();
shaderCache.clear();
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
@ -1041,3 +1155,99 @@ void RendererGL::initUbershader(OpenGL::Program& program) {
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_luts"), 3);
}
void RendererGL::accelerateVertexUpload(ShaderUnit& shaderUnit, PICA::DrawAcceleration* accel) {
u32 buffer = 0; // Vertex buffer index for non-fixed attributes
u32 attrCount = 0;
const u32 totalAttribCount = accel->totalAttribCount;
static constexpr GLenum attributeFormats[4] = {
GL_BYTE, // 0: Signed byte
GL_UNSIGNED_BYTE, // 1: Unsigned byte
GL_SHORT, // 2: Short
GL_FLOAT, // 3: Float
};
const u32 vertexCount = accel->maximumIndex - accel->minimumIndex + 1;
// Update index buffer if necessary
if (accel->indexed) {
usingShortIndices = accel->useShortIndices;
const usize indexBufferSize = regs[PICA::InternalRegs::VertexCountReg] * (usingShortIndices ? sizeof(u16) : sizeof(u8));
hwIndexBuffer->Bind();
auto indexBufferRes = hwIndexBuffer->Map(4, indexBufferSize);
hwIndexBufferOffset = reinterpret_cast<void*>(usize(indexBufferRes.buffer_offset));
std::memcpy(indexBufferRes.pointer, accel->indexBuffer, indexBufferSize);
// If we don't have glDrawRangeElementsBaseVertex, we must subtract the base index value from our index buffer manually
if (glDrawRangeElementsBaseVertex == nullptr) [[unlikely]] {
const u32 indexCount = regs[PICA::InternalRegs::VertexCountReg];
usingShortIndices ? PICA::IndexBuffer::subtractBaseIndex<true>((u8*)indexBufferRes.pointer, indexCount, accel->minimumIndex)
: PICA::IndexBuffer::subtractBaseIndex<false>((u8*)indexBufferRes.pointer, indexCount, accel->minimumIndex);
}
hwIndexBuffer->Unmap(indexBufferSize);
}
hwVertexBuffer->Bind();
auto vertexBufferRes = hwVertexBuffer->Map(4, accel->vertexDataSize);
u8* vertexData = static_cast<u8*>(vertexBufferRes.pointer);
const u32 vertexBufferOffset = vertexBufferRes.buffer_offset;
gl.bindVAO(hwShaderVAO);
// Enable or disable vertex attributes as needed
const u32 currentAttributeMask = accel->enabledAttributeMask;
// Use bitwise xor to calculate which attributes changed
u32 attributeMaskDiff = currentAttributeMask ^ previousAttributeMask;
while (attributeMaskDiff != 0) {
// Get index of next different attribute and turn it off
const u32 index = 31 - std::countl_zero<u32>(attributeMaskDiff);
const u32 mask = 1u << index;
attributeMaskDiff ^= mask;
if ((currentAttributeMask & mask) != 0) {
// Attribute was disabled and is now enabled
hwShaderVAO.enableAttribute(index);
} else {
// Attribute was enabled and is now disabled
hwShaderVAO.disableAttribute(index);
}
}
previousAttributeMask = currentAttributeMask;
// Upload the data for each (enabled) attribute loader into our vertex buffer
for (int i = 0; i < accel->totalLoaderCount; i++) {
auto& loader = accel->loaders[i];
std::memcpy(vertexData, loader.data, loader.size);
vertexData += loader.size;
}
hwVertexBuffer->Unmap(accel->vertexDataSize);
// Iterate over the 16 PICA input registers and configure how they should be fetched.
for (int i = 0; i < 16; i++) {
const auto& attrib = accel->attributeInfo[i];
const u32 attributeMask = 1u << i;
if (accel->fixedAttributes & attributeMask) {
auto& attrValue = fixedAttrValues[i];
// This is a fixed attribute, so set its fixed value, but only if it actually needs to be updated
if (attrValue[0] != attrib.fixedValue[0] || attrValue[1] != attrib.fixedValue[1] || attrValue[2] != attrib.fixedValue[2] ||
attrValue[3] != attrib.fixedValue[3]) {
std::memcpy(attrValue.data(), attrib.fixedValue.data(), sizeof(attrib.fixedValue));
glVertexAttrib4f(i, attrib.fixedValue[0], attrib.fixedValue[1], attrib.fixedValue[2], attrib.fixedValue[3]);
}
} else if (accel->enabledAttributeMask & attributeMask) {
glVertexAttribPointer(
i, attrib.componentCount, attributeFormats[attrib.type], GL_FALSE, attrib.stride,
reinterpret_cast<GLvoid*>(vertexBufferOffset + attrib.offset)
);
}
}
}

109
src/core/services/fonts.cpp Normal file
View file

@ -0,0 +1,109 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
// Adapted from https://github.com/PabloMK7/citra/blob/master/src/core/hle/service/apt/bcfnt/bcfnt.cpp
#include "services/fonts.hpp"
#include <cstring>
namespace HLE::Fonts {
void relocateSharedFont(u8* sharedFont, u32 newAddress) {
constexpr u32 sharedFontStartOffset = 0x80;
const u8* cfntData = &sharedFont[sharedFontStartOffset];
CFNT cfnt;
std::memcpy(&cfnt, cfntData, sizeof(cfnt));
u32 assumedCmapOffset = 0;
u32 assumedCwdhOffset = 0;
u32 assumedTglpOffset = 0;
u32 firstCmapOffset = 0;
u32 firstCwdhOffset = 0;
u32 firstTglpOffset = 0;
// First discover the location of sections so that the rebase offset can be auto-detected
u32 currentOffset = sharedFontStartOffset + cfnt.headerSize;
for (uint block = 0; block < cfnt.numBlocks; ++block) {
const u8* data = &sharedFont[currentOffset];
SectionHeader sectionHeader;
std::memcpy(&sectionHeader, data, sizeof(sectionHeader));
if (firstCmapOffset == 0 && std::memcmp(sectionHeader.magic, "CMAP", 4) == 0) {
firstCmapOffset = currentOffset;
} else if (firstCwdhOffset == 0 && std::memcmp(sectionHeader.magic, "CWDH", 4) == 0) {
firstCwdhOffset = currentOffset;
} else if (firstTglpOffset == 0 && std::memcmp(sectionHeader.magic, "TGLP", 4) == 0) {
firstTglpOffset = currentOffset;
} else if (std::memcmp(sectionHeader.magic, "FINF", 4) == 0) {
Fonts::FINF finf;
std::memcpy(&finf, data, sizeof(finf));
assumedCmapOffset = finf.cmapOffset - sizeof(SectionHeader);
assumedCwdhOffset = finf.cwdhOffset - sizeof(SectionHeader);
assumedTglpOffset = finf.tglpOffset - sizeof(SectionHeader);
}
currentOffset += sectionHeader.sectionSize;
}
u32 previousBase = assumedCmapOffset - firstCmapOffset;
if ((previousBase != assumedCwdhOffset - firstCwdhOffset) || (previousBase != assumedTglpOffset - firstTglpOffset)) {
Helpers::warn("You shouldn't be seeing this. Shared Font file offsets might be borked?");
}
u32 offset = newAddress - previousBase;
// Reset pointer back to start of sections and do the actual rebase
currentOffset = sharedFontStartOffset + cfnt.headerSize;
for (uint block = 0; block < cfnt.numBlocks; ++block) {
u8* data = &sharedFont[currentOffset];
SectionHeader sectionHeader;
std::memcpy(&sectionHeader, data, sizeof(sectionHeader));
if (std::memcmp(sectionHeader.magic, "FINF", 4) == 0) {
Fonts::FINF finf;
std::memcpy(&finf, data, sizeof(finf));
// Relocate the offsets in the FINF section
finf.cmapOffset += offset;
finf.cwdhOffset += offset;
finf.tglpOffset += offset;
std::memcpy(data, &finf, sizeof(finf));
} else if (std::memcmp(sectionHeader.magic, "CMAP", 4) == 0) {
Fonts::CMAP cmap;
std::memcpy(&cmap, data, sizeof(cmap));
// Relocate the offsets in the CMAP section
if (cmap.nextCmapOffset != 0) {
cmap.nextCmapOffset += offset;
}
std::memcpy(data, &cmap, sizeof(cmap));
} else if (std::memcmp(sectionHeader.magic, "CWDH", 4) == 0) {
Fonts::CWDH cwdh;
std::memcpy(&cwdh, data, sizeof(cwdh));
// Relocate the offsets in the CWDH section
if (cwdh.nextCwdhOffset != 0) {
cwdh.nextCwdhOffset += offset;
}
std::memcpy(data, &cwdh, sizeof(cwdh));
} else if (std::memcmp(sectionHeader.magic, "TGLP", 4) == 0) {
Fonts::TGLP tglp;
std::memcpy(&tglp, data, sizeof(tglp));
// Relocate the offsets in the TGLP section
tglp.sheetDataOffset += offset;
std::memcpy(data, &tglp, sizeof(tglp));
}
currentOffset += sectionHeader.sectionSize;
}
}
} // namespace HLE::Fonts

View file

@ -8,6 +8,7 @@
#include "renderer_gl/renderer_gl.hpp"
#include "services/hid.hpp"
#include "android_utils.hpp"
#include "sdl_sensors.hpp"
std::unique_ptr<Emulator> emulator = nullptr;
HIDService* hidService = nullptr;
@ -43,6 +44,7 @@ extern "C" {
AlberFunction(void, functionName) (JNIEnv* env, jobject obj, type value) { emulator->getConfig().settingName = value; }
MAKE_SETTING(setShaderJitEnabled, jboolean, shaderJitEnabled)
MAKE_SETTING(setAccurateShaderMulEnable, jboolean, accurateShaderMul)
#undef MAKE_SETTING
@ -87,6 +89,7 @@ AlberFunction(void, Finalize)(JNIEnv* env, jobject obj) {
emulator = nullptr;
hidService = nullptr;
renderer = nullptr;
romLoaded = false;
}
AlberFunction(jboolean, HasRomLoaded)(JNIEnv* env, jobject obj) { return romLoaded; }
@ -110,6 +113,19 @@ AlberFunction(void, TouchScreenUp)(JNIEnv* env, jobject obj) { hidService->relea
AlberFunction(void, KeyUp)(JNIEnv* env, jobject obj, jint keyCode) { hidService->releaseKey((u32)keyCode); }
AlberFunction(void, KeyDown)(JNIEnv* env, jobject obj, jint keyCode) { hidService->pressKey((u32)keyCode); }
AlberFunction(void, SetGyro)(JNIEnv* env, jobject obj, jfloat roll, jfloat pitch, jfloat yaw) {
auto rotation = Sensors::SDL::convertRotation({ float(roll), float(pitch), float(yaw) });
hidService->setPitch(s16(rotation.x));
hidService->setRoll(s16(rotation.y));
hidService->setYaw(s16(rotation.z));
}
AlberFunction(void, SetAccel)(JNIEnv* env, jobject obj, jfloat rawX, jfloat rawY, jfloat rawZ) {
float data[3] = { float(rawX), float(rawY), float(rawZ) };
auto accel = Sensors::SDL::convertAcceleration(data);
hidService->setAccel(accel.x, accel.y, accel.z);
}
AlberFunction(void, SetCirclepadAxis)(JNIEnv* env, jobject obj, jint x, jint y) {
hidService->setCirclepadX((s16)x);
hidService->setCirclepadY((s16)y);
@ -139,4 +155,4 @@ int AndroidUtils::openDocument(const char* path, const char* perms) {
env->DeleteLocalRef(jmode);
return (int)result;
}
}

View file

@ -163,13 +163,14 @@ static int fetchVariableRange(std::string key, int min, int max) {
static void configInit() {
static const retro_variable values[] = {
{"panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault ? "Enable shader JIT; enabled|disabled"
: "Enable shader JIT; disabled|enabled"},
{"panda3ds_use_shader_jit", EmulatorConfig::shaderJitDefault ? "Enable shader JIT; enabled|disabled" : "Enable shader JIT; disabled|enabled"},
{"panda3ds_accelerate_shaders",
EmulatorConfig::accelerateShadersDefault ? "Run 3DS shaders on the GPU; enabled|disabled" : "Run 3DS shaders on the GPU; disabled|enabled"},
{"panda3ds_accurate_shader_mul", "Enable accurate shader multiplication; disabled|enabled"},
{"panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault ? "Use ubershaders (No stutter, maybe slower); enabled|disabled"
: "Use ubershaders (No stutter, maybe slower); disabled|enabled"},
{"panda3ds_use_vsync", "Enable VSync; enabled|disabled"},
{"panda3ds_dsp_emulation", "DSP emulation; Null|HLE|LLE"},
{"panda3ds_dsp_emulation", "DSP emulation; HLE|LLE|Null"},
{"panda3ds_use_audio", "Enable audio; disabled|enabled"},
{"panda3ds_use_virtual_sd", "Enable virtual SD card; enabled|disabled"},
{"panda3ds_write_protect_virtual_sd", "Write protect virtual SD card; disabled|enabled"},
@ -197,6 +198,8 @@ static void configUpdate() {
config.sdWriteProtected = fetchVariableBool("panda3ds_write_protect_virtual_sd", false);
config.accurateShaderMul = fetchVariableBool("panda3ds_accurate_shader_mul", false);
config.useUbershaders = fetchVariableBool("panda3ds_use_ubershader", EmulatorConfig::ubershaderDefault);
config.accelerateShaders = fetchVariableBool("panda3ds_accelerate_shaders", EmulatorConfig::accelerateShadersDefault);
config.forceShadergenForLights = fetchVariableBool("panda3ds_ubershader_lighting_override", true);
config.lightShadergenThreshold = fetchVariableRange("panda3ds_ubershader_lighting_override_threshold", 1, 8);
config.discordRpcEnabled = false;

View file

@ -130,6 +130,32 @@ MAKE_MEMORY_FUNCTIONS(32)
MAKE_MEMORY_FUNCTIONS(64)
#undef MAKE_MEMORY_FUNCTIONS
static int readFloatThunk(lua_State* L) {
const u32 vaddr = (u32)lua_tonumber(L, 1);
lua_pushnumber(L, (lua_Number)Helpers::bit_cast<float, u32>(LuaManager::g_emulator->getMemory().read32(vaddr)));
return 1;
}
static int writeFloatThunk(lua_State* L) {
const u32 vaddr = (u32)lua_tonumber(L, 1);
const float value = (float)lua_tonumber(L, 2);
LuaManager::g_emulator->getMemory().write32(vaddr, Helpers::bit_cast<u32, float>(value));
return 0;
}
static int readDoubleThunk(lua_State* L) {
const u32 vaddr = (u32)lua_tonumber(L, 1);
lua_pushnumber(L, (lua_Number)Helpers::bit_cast<double, u64>(LuaManager::g_emulator->getMemory().read64(vaddr)));
return 1;
}
static int writeDoubleThunk(lua_State* L) {
const u32 vaddr = (u32)lua_tonumber(L, 1);
const double value = (double)lua_tonumber(L, 2);
LuaManager::g_emulator->getMemory().write64(vaddr, Helpers::bit_cast<u64, double>(value));
return 0;
}
static int getAppIDThunk(lua_State* L) {
std::optional<u64> id = LuaManager::g_emulator->getMemory().getProgramID();
@ -248,10 +274,14 @@ static constexpr luaL_Reg functions[] = {
{ "__read16", read16Thunk },
{ "__read32", read32Thunk },
{ "__read64", read64Thunk },
{ "__readFloat", readFloatThunk },
{ "__readDouble", readDoubleThunk },
{ "__write8", write8Thunk} ,
{ "__write16", write16Thunk },
{ "__write32", write32Thunk },
{ "__write64", write64Thunk },
{ "__writeFloat", writeFloatThunk },
{ "__writeDouble", writeDoubleThunk },
{ "__getAppID", getAppIDThunk },
{ "__pause", pauseThunk },
{ "__resume", resumeThunk },
@ -273,10 +303,15 @@ void LuaManager::initializeThunks() {
read16 = function(addr) return GLOBALS.__read16(addr) end,
read32 = function(addr) return GLOBALS.__read32(addr) end,
read64 = function(addr) return GLOBALS.__read64(addr) end,
readFloat = function(addr) return GLOBALS.__readFloat(addr) end,
readDouble = function(addr) return GLOBALS.__readDouble(addr) end,
write8 = function(addr, value) GLOBALS.__write8(addr, value) end,
write16 = function(addr, value) GLOBALS.__write16(addr, value) end,
write32 = function(addr, value) GLOBALS.__write32(addr, value) end,
write64 = function(addr, value) GLOBALS.__write64(addr, value) end,
writeFloat = function(addr, value) GLOBALS.__writeFloat(addr, value) end,
writeDouble = function(addr, value) GLOBALS.__writeDouble(addr, value) end,
getAppID = function()
local ffi = require("ffi")

View file

@ -24,13 +24,16 @@ public class AlberDriver {
public static native void KeyUp(int code);
public static native void SetCirclepadAxis(int x, int y);
public static native void TouchScreenUp();
public static native void TouchScreenDown(int x, int y);
public static native void TouchScreenDown(int x, int y);;
public static native void SetGyro(float roll, float pitch, float yaw);
public static native void SetAccel(float x, float y, float z);
public static native void Pause();
public static native void Resume();
public static native void LoadLuaScript(String script);
public static native byte[] GetSmdh();
public static native void setShaderJitEnabled(boolean enable);
public static native void setAccurateShaderMulEnable(boolean enable);
public static int openDocument(String path, String mode) {
try {

View file

@ -3,11 +3,22 @@ package com.panda3ds.pandroid.app;
import android.app.ActivityManager;
import android.app.PictureInPictureParams;
import android.content.Intent;
import android.content.res.Configuration;
import android.hardware.Sensor;
import android.hardware.SensorEvent;
import android.hardware.SensorEventListener;
import android.hardware.SensorManager;
import android.opengl.Matrix;
import android.os.Build;
import android.os.Bundle;
import android.renderscript.Matrix3f;
import android.renderscript.Matrix4f;
import android.util.Log;
import android.util.Rational;
import android.view.Display;
import android.view.KeyEvent;
import android.view.MotionEvent;
import android.view.Surface;
import android.view.View;
import android.view.ViewGroup;
import android.view.WindowManager;
@ -25,6 +36,7 @@ import com.panda3ds.pandroid.app.game.EmulatorCallback;
import com.panda3ds.pandroid.data.config.GlobalConfig;
import com.panda3ds.pandroid.input.InputHandler;
import com.panda3ds.pandroid.input.InputMap;
import com.panda3ds.pandroid.math.Vector3;
import com.panda3ds.pandroid.utils.Constants;
import com.panda3ds.pandroid.view.PandaGlSurfaceView;
import com.panda3ds.pandroid.view.PandaLayoutController;
@ -32,7 +44,7 @@ import com.panda3ds.pandroid.view.ds.DsLayoutManager;
import com.panda3ds.pandroid.view.renderer.ConsoleRenderer;
import com.panda3ds.pandroid.view.utils.PerformanceView;
public class GameActivity extends BaseActivity implements EmulatorCallback {
public class GameActivity extends BaseActivity implements EmulatorCallback, SensorEventListener {
private final DrawerFragment drawerFragment = new DrawerFragment();
private final AlberInputListener inputListener = new AlberInputListener(this);
private ConsoleRenderer renderer;
@ -74,6 +86,19 @@ public class GameActivity extends BaseActivity implements EmulatorCallback {
((FrameLayout) findViewById(R.id.panda_gl_frame)).addView(view, new FrameLayout.LayoutParams(ViewGroup.LayoutParams.WRAP_CONTENT, ViewGroup.LayoutParams.WRAP_CONTENT));
}
swapScreens(GlobalConfig.get(GlobalConfig.KEY_CURRENT_DS_LAYOUT));
registerSensors();
}
private void registerSensors() {
SensorManager sensorManager = (SensorManager) getSystemService(SENSOR_SERVICE);
Sensor accel = sensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER);
if (accel != null) {
sensorManager.registerListener(this, accel, 1);
}
Sensor gryro = sensorManager.getDefaultSensor(Sensor.TYPE_GYROSCOPE);
if (gryro != null) {
sensorManager.registerListener(this, gryro, 1);
}
}
private void changeOverlayVisibility(boolean visible) {
@ -85,7 +110,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback {
@Override
protected void onResume() {
super.onResume();
getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
getWindow().getDecorView().setSystemUiVisibility(View.SYSTEM_UI_FLAG_FULLSCREEN | View.SYSTEM_UI_FLAG_HIDE_NAVIGATION);
getWindow().addFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN);
InputHandler.reset();
@ -94,6 +119,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback {
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O_MR1) {
getTheme().applyStyle(R.style.GameActivityNavigationBar, true);
}
registerSensors();
}
private void enablePIP() {
@ -113,6 +139,7 @@ public class GameActivity extends BaseActivity implements EmulatorCallback {
protected void onPause() {
super.onPause();
((SensorManager)getSystemService(SENSOR_SERVICE)).unregisterListener(this);
InputHandler.reset();
if (GlobalConfig.get(GlobalConfig.KEY_PICTURE_IN_PICTURE)) {
if (Build.VERSION.SDK_INT > Build.VERSION_CODES.O) {
@ -174,10 +201,48 @@ public class GameActivity extends BaseActivity implements EmulatorCallback {
@Override
protected void onDestroy() {
((SensorManager)getSystemService(SENSOR_SERVICE)).unregisterListener(this);
if (AlberDriver.HasRomLoaded()) {
AlberDriver.Finalize();
}
super.onDestroy();
}
private float getDeviceRotationAngle() {
if (getWindow().getDecorView() == null || getWindow().getDecorView().getDisplay() == null)
return 0.0f;
int rotation = getWindow().getDecorView().getDisplay().getRotation();
switch (rotation) {
case Surface.ROTATION_90: return 90.0f;
case Surface.ROTATION_180: return 180.0f;
case Surface.ROTATION_270: return -90.0f;
default: return 0.0f;
}
}
@Override
public void onSensorChanged(SensorEvent event) {
if (AlberDriver.HasRomLoaded()) {
Sensor sensor = event.sensor;
switch (sensor.getType()) {
case Sensor.TYPE_ACCELEROMETER: {
float[] values = event.values;
Vector3 vec3 = new Vector3(values[0], values[1], values[2]);
vec3.rotateByEuler(new Vector3(0, 0, (float) (getDeviceRotationAngle() * (Math.PI / 180.0f))));
AlberDriver.SetAccel(vec3.x, vec3.y, vec3.z);
} break;
case Sensor.TYPE_GYROSCOPE: {
float[] values = event.values;
Vector3 vec3 = new Vector3(values[0], values[1], values[2]);
vec3.rotateByEuler(new Vector3(0, 0, (float) (getDeviceRotationAngle() * (Math.PI / 180.0f))));
AlberDriver.SetGyro(vec3.x, vec3.y, vec3.z);
} break;
}
}
}
@Override
public void onAccuracyChanged(Sensor sensor, int accuracy) {}
}

View file

@ -26,6 +26,10 @@ public abstract class BasePreferenceFragment extends PreferenceFragmentCompat {
((SwitchPreferenceCompat)findPreference(id)).setChecked(value);
}
protected void setSummaryValue(String id,String text) {
findPreference(id).setSummary(text);
}
protected void setActivityTitle(@StringRes int titleId) {
ActionBar header = ((AppCompatActivity) requireActivity()).getSupportActionBar();
if (header != null) {

View file

@ -22,6 +22,7 @@ public class AdvancedPreferences extends BasePreferenceFragment {
setItemClick("performanceMonitor", pref -> GlobalConfig.set(GlobalConfig.KEY_SHOW_PERFORMANCE_OVERLAY, ((SwitchPreferenceCompat) pref).isChecked()));
setItemClick("shaderJit", pref -> GlobalConfig.set(GlobalConfig.KEY_SHADER_JIT, ((SwitchPreferenceCompat) pref).isChecked()));
setItemClick("accurateShaderMul", pref -> GlobalConfig.set(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY, ((SwitchPreferenceCompat) pref).isChecked()));
setItemClick("loggerService", pref -> {
boolean checked = ((SwitchPreferenceCompat) pref).isChecked();
Context ctx = PandroidApplication.getAppContext();
@ -46,5 +47,6 @@ public class AdvancedPreferences extends BasePreferenceFragment {
((SwitchPreferenceCompat) findPreference("performanceMonitor")).setChecked(GlobalConfig.get(GlobalConfig.KEY_SHOW_PERFORMANCE_OVERLAY));
((SwitchPreferenceCompat) findPreference("loggerService")).setChecked(GlobalConfig.get(GlobalConfig.KEY_LOGGER_SERVICE));
((SwitchPreferenceCompat) findPreference("shaderJit")).setChecked(GlobalConfig.get(GlobalConfig.KEY_SHADER_JIT));
((SwitchPreferenceCompat) findPreference("accurateShaderMul")).setChecked(GlobalConfig.get(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY));
}
}

View file

@ -1,7 +1,13 @@
package com.panda3ds.pandroid.app.preferences;
import android.net.Uri;
import android.os.Bundle;
import android.util.Log;
import android.widget.Toast;
import androidx.activity.result.ActivityResultCallback;
import androidx.activity.result.ActivityResultLauncher;
import androidx.activity.result.contract.ActivityResultContracts;
import androidx.annotation.Nullable;
import androidx.preference.SwitchPreferenceCompat;
@ -10,8 +16,11 @@ import com.panda3ds.pandroid.app.PreferenceActivity;
import com.panda3ds.pandroid.app.base.BasePreferenceFragment;
import com.panda3ds.pandroid.app.preferences.screen_editor.ScreenLayoutsPreference;
import com.panda3ds.pandroid.data.config.GlobalConfig;
import com.panda3ds.pandroid.utils.FileUtils;
public class GeneralPreferences extends BasePreferenceFragment {
public class GeneralPreferences extends BasePreferenceFragment implements ActivityResultCallback<Uri> {
private final ActivityResultContracts.OpenDocument openFolderContract = new ActivityResultContracts.OpenDocument();
private ActivityResultLauncher<String[]> pickFileRequest;
@Override
public void onCreatePreferences(@Nullable Bundle savedInstanceState, @Nullable String rootKey) {
setPreferencesFromResource(R.xml.general_preference, rootKey);
@ -21,6 +30,11 @@ public class GeneralPreferences extends BasePreferenceFragment {
setItemClick("behavior.pictureInPicture", (pref)-> GlobalConfig.set(GlobalConfig.KEY_PICTURE_IN_PICTURE, ((SwitchPreferenceCompat)pref).isChecked()));
setActivityTitle(R.string.general);
refresh();
setItemClick("games.aes_key", pref -> pickFileRequest.launch(new String[]{ "text/plain" }));
setItemClick("games.seed_db", pref -> pickFileRequest.launch(new String[]{ "application/octet-stream" }));
pickFileRequest = registerForActivityResult(openFolderContract, this);
}
@Override
@ -31,5 +45,45 @@ public class GeneralPreferences extends BasePreferenceFragment {
private void refresh() {
setSwitchValue("behavior.pictureInPicture", GlobalConfig.get(GlobalConfig.KEY_PICTURE_IN_PICTURE));
setSummaryValue("games.aes_key", String.format(getString(FileUtils.exists(FileUtils.getPrivatePath()+"/sysdata/aes_keys.txt") ? R.string.file_available : R.string.file_not_available), "aes_keys.txt"));
setSummaryValue("games.seed_db", String.format(getString(FileUtils.exists(FileUtils.getPrivatePath()+"/sysdata/seeddb.bin") ? R.string.file_available : R.string.file_not_available), "seeddb.bin"));
}
@Override
public void onDestroy() {
super.onDestroy();
if (pickFileRequest != null) {
pickFileRequest.unregister();
pickFileRequest = null;
}
}
@Override
public void onActivityResult(Uri result) {
if (result != null) {
String path = result.toString();
Log.w("File", path + " -> " + FileUtils.getName(path));
switch (String.valueOf(FileUtils.getName(path))) {
case "aes_keys.txt":
case "seeddb.bin": {
String name = FileUtils.getName(path);
if (FileUtils.getLength(path) < 1024 * 256) {
String sysdataFolder = FileUtils.getPrivatePath() + "/sysdata";
if (!FileUtils.exists(sysdataFolder)) {
FileUtils.createDir(FileUtils.getPrivatePath(), "sysdata");
}
if (FileUtils.exists(sysdataFolder + "/" + name)) {
FileUtils.delete(sysdataFolder + "/" + name);
}
FileUtils.copyFile(path, FileUtils.getPrivatePath() + "/sysdata/", name);
Toast.makeText(getActivity(), String.format(getString(R.string.file_imported), name), Toast.LENGTH_LONG).show();
} else {
Toast.makeText(getActivity(), R.string.invalid_file, Toast.LENGTH_LONG).show();
}
} break;
default: Toast.makeText(getActivity(), R.string.invalid_file, Toast.LENGTH_LONG).show(); break;
}
refresh();
}
}
}

View file

@ -23,7 +23,7 @@ public class ScreenEditorPreference extends Fragment {
@Override
public View onCreateView(@NonNull LayoutInflater inflater, @Nullable ViewGroup container, @Nullable Bundle savedInstanceState) {
layout = new LinearLayout(container.getContext());
layout.setSystemUiVisibility(View.SYSTEM_UI_FLAG_HIDE_NAVIGATION|View.SYSTEM_UI_FLAG_FULLSCREEN|View.SYSTEM_UI_FLAG_IMMERSIVE);
layout.setSystemUiVisibility(View.SYSTEM_UI_FLAG_FULLSCREEN|View.SYSTEM_UI_FLAG_IMMERSIVE);
return layout;
}

View file

@ -95,7 +95,7 @@ public class AppDataDocumentProvider extends DocumentsProvider {
private void includeFile(MatrixCursor cursor, File file) {
int flags = 0;
if (file.isDirectory()) {
flags = Document.FLAG_DIR_SUPPORTS_CREATE;
flags = Document.FLAG_DIR_SUPPORTS_CREATE | Document.FLAG_SUPPORTS_DELETE;
} else {
flags = Document.FLAG_SUPPORTS_WRITE | Document.FLAG_SUPPORTS_REMOVE | Document.FLAG_SUPPORTS_DELETE;
}

View file

@ -22,6 +22,7 @@ public class GlobalConfig {
public static DataModel data;
public static final Key<Boolean> KEY_SHADER_JIT = new Key<>("emu.shader_jit", true);
public static final Key<Boolean> KEY_ACCURATE_SHADER_MULTIPLY = new Key<>("emu.accurate_shader_mul", false);
public static final Key<Boolean> KEY_PICTURE_IN_PICTURE = new Key<>("app.behavior.pictureInPicture", false);
public static final Key<Boolean> KEY_SHOW_PERFORMANCE_OVERLAY = new Key<>("dev.performanceOverlay", false);
public static final Key<Boolean> KEY_LOGGER_SERVICE = new Key<>("dev.loggerService", false);

View file

@ -0,0 +1,31 @@
package com.panda3ds.pandroid.math;
public class Quaternion {
public float x, y, z, w;
public Quaternion(float x, float y, float z, float w) {
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
public Quaternion fromEuler(Vector3 euler) {
float x = euler.x;
float y = euler.y;
float z = euler.z;
double c1 = Math.cos(x / 2.0);
double c2 = Math.cos(y / 2.0);
double c3 = Math.cos(z / 2.0);
double s1 = Math.sin(x / 2.0);
double s2 = Math.sin(y / 2.0);
double s3 = Math.sin(z / 2.0);
this.x = (float) (s1 * c2 * c3 + c1 * s2 * s3);
this.y = (float) (c1 * s2 * c3 - s1 * c2 * s3);
this.z = (float) (c1 * c2 * s3 + s1 * s2 * c3);
this.w = (float) (c1 * c2 * c3 - s1 * s2 * s3);
return this;
}
}

View file

@ -0,0 +1,32 @@
package com.panda3ds.pandroid.math;
public class Vector3 {
private final Quaternion quaternion = new Quaternion(0, 0, 0, 0);
public float x, y, z;
public Vector3(float x, float y, float z) {
this.x = x;
this.y = y;
this.z = z;
}
public Vector3 rotateByEuler(Vector3 euler) {
this.quaternion.fromEuler(euler);
float x = this.x, y = this.y, z = this.z;
float qx = this.quaternion.x;
float qy = this.quaternion.y;
float qz = this.quaternion.z;
float qw = this.quaternion.w;
float ix = qw * x + qy * z - qz * y;
float iy = qw * y + qz * x - qx * z;
float iz = qw * z + qx * y - qy * x;
float iw = -qx * x - qy * qz * z;
this.x = ix * qw + iw * -qx + iy * -qz - iz * -qy;
this.y = iy * qw + iw * -qy + iz * -qx - ix * -qz;
this.z = iz * qw + iw * -qz + ix * -qy - iy * -qx;
return this;
}
}

View file

@ -230,6 +230,10 @@ public class FileUtils {
return parseFile(path).lastModified();
}
public static long getLength(String path) {
return parseFile(path).length();
}
public static String[] listFiles(String path) {
DocumentFile folder = parseFile(path);
DocumentFile[] files = folder.listFiles();

View file

@ -93,6 +93,7 @@ public class PandaGlRenderer implements GLSurfaceView.Renderer, ConsoleRenderer
AlberDriver.Initialize();
AlberDriver.setShaderJitEnabled(GlobalConfig.get(GlobalConfig.KEY_SHADER_JIT));
AlberDriver.setAccurateShaderMulEnable(GlobalConfig.get(GlobalConfig.KEY_ACCURATE_SHADER_MULTIPLY));
// If loading the ROM failed, display an error message and early exit
if (!AlberDriver.LoadRom(romPath)) {

View file

@ -90,4 +90,12 @@
<string name="behavior">Comportamento</string>
<string name="invalid_game">Jogo invalido</string>
<string name="tools">Ferramentas</string>
<string name="pref_accurate_shader_title">Multiplicação precisa de shader</string>
<string name="pref_accurate_shader_summary">Usar calculos mais precisos para shaders</string>
<string name="pref_game_crypto_keys">Importar chaves</string>
<string name="file_available">%s disponível</string>
<string name="file_not_available">%s não disponível</string>
<string name="pref_game_seed_db_keys">Importar SeedDB</string>
<string name="invalid_file">Arquivo inválido</string>
<string name="file_imported">%s Importado</string>
</resources>

View file

@ -96,4 +96,12 @@
<string name="region_taiwan">Taiwan</string>
<string name="behavior">Behavior</string>
<string name="invalid_game">Invalid game</string>
<string name="pref_accurate_shader_title">Accurate shader multiplication</string>
<string name="pref_accurate_shader_summary">Can improve rendering at a small performance loss</string>
<string name="pref_game_crypto_keys">Import keys</string>
<string name="file_imported">%s imported</string>
<string name="file_available">%s available</string>
<string name="file_not_available">%s not available</string>
<string name="pref_game_seed_db_keys">Import SeedDB</string>
<string name="invalid_file">Invalid file</string>
</resources>

View file

@ -28,5 +28,11 @@
app:summary="@string/pref_shader_jit_summary"
app:iconSpaceReserved="false"/>
<SwitchPreferenceCompat
app:key="accurateShaderMul"
app:title="@string/pref_accurate_shader_title"
app:summary="@string/pref_accurate_shader_summary"
app:iconSpaceReserved="false"/>
</PreferenceCategory>
</PreferenceScreen>

View file

@ -23,6 +23,16 @@
app:title="@string/pref_game_folders"
app:summary="@string/pref_game_folders_summary"
app:iconSpaceReserved="false"/>
<Preference
android:key="games.aes_key"
app:title="@string/pref_game_crypto_keys"
app:summary="@string/pref_game_crypto_keys"
app:iconSpaceReserved="false"/>
<Preference
android:key="games.seed_db"
app:title="@string/pref_game_seed_db_keys"
app:summary="@string/pref_game_crypto_keys"
app:iconSpaceReserved="false"/>
</PreferenceCategory>
<PreferenceCategory
app:title="@string/behavior"