Merge branch 'master' into specialized-shaders-2

This commit is contained in:
wheremyfoodat 2024-07-14 23:57:12 +03:00
commit c73758959b
28 changed files with 5204 additions and 67 deletions

View file

@ -144,8 +144,8 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
case ShaderOpcodes::CMP2: recCMP(shaderUnit, instruction); break;
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
// case ShaderOpcodes::DPH:
// case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break;
case ShaderOpcodes::DPH:
case ShaderOpcodes::DPHI: recDPH(shaderUnit, instruction); break;
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
@ -533,6 +533,39 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
storeRegister(src1Vec, shader, dest, operandDescriptor);
}
void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
const bool isDPHI = (instruction >> 26) == ShaderOpcodes::DPHI;
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = isDPHI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction);
const u32 src2 = isDPHI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction);
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
const u32 writeMask = getBits<0, 4>(operandDescriptor);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1Vec, shader, src1, isDPHI ? 0 : idx, operandDescriptor);
loadRegister<2>(src2Vec, shader, src2, isDPHI ? idx : 0, operandDescriptor);
// // Attach 1.0 to the w component of src1
MOV(src1Vec.Selem()[3], onesVector.Selem()[0]);
// Now perform a DP4
// Do a piecewise multiplication of the vectors first
if constexpr (useSafeMUL) {
emitSafeMUL(src1Vec, src2Vec, scratch1Vec);
} else {
FMUL(src1Vec.S4(), src1Vec.S4(), src2Vec.S4());
}
FADDP(src1Vec.S4(), src1Vec.S4(), src1Vec.S4()); // Now add the adjacent components together
FADDP(src1Vec.toS(), src1Vec.toD().S2()); // Again for the bottom 2 lanes. Now the bottom lane contains the dot product
if (writeMask != 0x8) { // Copy bottom lane to all lanes if we're not simply writing back x
DUP(src1Vec.S4(), src1Vec.Selem()[0]); // src1Vec = src1Vec.xxxx
}
storeRegister(src1Vec, shader, dest, operandDescriptor);
}
oaknut::Label ShaderEmitter::emitLog2Func() {
oaknut::Label funcStart;

View file

@ -355,7 +355,7 @@ namespace Audio {
}
switch (buffer.format) {
case SampleFormat::PCM8: Helpers::warn("Unimplemented sample format!"); break;
case SampleFormat::PCM8: source.currentSamples = decodePCM8(data, buffer.sampleCount, source); break;
case SampleFormat::PCM16: source.currentSamples = decodePCM16(data, buffer.sampleCount, source); break;
case SampleFormat::ADPCM: source.currentSamples = decodeADPCM(data, buffer.sampleCount, source); break;
@ -406,6 +406,26 @@ namespace Audio {
}
}
HLE_DSP::SampleBuffer HLE_DSP::decodePCM8(const u8* data, usize sampleCount, Source& source) {
SampleBuffer decodedSamples(sampleCount);
if (source.sourceType == SourceType::Stereo) {
for (usize i = 0; i < sampleCount; i++) {
const s16 left = s16(u16(*data++) << 8);
const s16 right = s16(u16(*data++) << 8);
decodedSamples[i] = {left, right};
}
} else {
// Mono
for (usize i = 0; i < sampleCount; i++) {
const s16 sample = s16(u16(*data++) << 8);
decodedSamples[i] = {sample, sample};
}
}
return decodedSamples;
}
HLE_DSP::SampleBuffer HLE_DSP::decodePCM16(const u8* data, usize sampleCount, Source& source) {
SampleBuffer decodedSamples(sampleCount);
const s16* data16 = reinterpret_cast<const s16*>(data);

View file

@ -39,7 +39,35 @@ HorizonResult SDMCArchive::createFile(const FSPath& path, u64 size) {
}
HorizonResult SDMCArchive::deleteFile(const FSPath& path) {
Helpers::panic("[SDMC] Unimplemented DeleteFile");
if (path.type == PathType::UTF16) {
if (!isPathSafe<PathType::UTF16>(path)) {
Helpers::panic("Unsafe path in SDMC::DeleteFile");
}
fs::path p = IOFile::getAppData() / "SDMC";
p += fs::path(path.utf16_string).make_preferred();
if (fs::is_directory(p)) {
Helpers::panic("SDMC::DeleteFile: Tried to delete directory");
}
if (!fs::is_regular_file(p)) {
return Result::FS::FileNotFoundAlt;
}
std::error_code ec;
bool success = fs::remove(p, ec);
// It might still be possible for fs::remove to fail, if there's eg an open handle to a file being deleted
// In this case, print a warning, but still return success for now
if (!success) {
Helpers::warn("SDMC::DeleteFile: fs::remove failed\n");
}
return Result::Success;
}
Helpers::panic("SDMCArchive::DeleteFile: Unknown path type");
return Result::Success;
}
@ -145,7 +173,7 @@ Rust::Result<DirectorySession, HorizonResult> SDMCArchive::openDirectory(const F
if (path.type == PathType::UTF16) {
if (!isPathSafe<PathType::UTF16>(path)) {
Helpers::panic("Unsafe path in SaveData::OpenDirectory");
Helpers::panic("Unsafe path in SDMC::OpenDirectory");
}
fs::path p = IOFile::getAppData() / "SDMC";

View file

@ -184,7 +184,8 @@ void Kernel::setFileSize(u32 messagePointer, Handle fileHandle) {
if (success) {
mem.write32(messagePointer + 4, Result::Success);
} else {
Helpers::panic("FileOp::SetFileSize failed");
Helpers::warn("FileOp::SetFileSize failed");
mem.write32(messagePointer + 4, Result::FailurePlaceholder);
}
} else {
Helpers::panic("Tried to set file size of file without file descriptor");

View file

@ -399,3 +399,5 @@ std::string Kernel::getProcessName(u32 pid) {
Helpers::panic("Attempted to name non-current process");
}
}
Scheduler& Kernel::getScheduler() { return cpu.getScheduler(); }

View file

@ -29,6 +29,9 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
smdh.clear();
partitionInfo = info;
primaryKey = {};
secondaryKey = {};
size = u64(*(u32*)&header[0x104]) * mediaUnit; // TODO: Maybe don't type pun because big endian will break
exheaderSize = *(u32*)&header[0x180];
@ -78,11 +81,11 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
if (!primaryResult.first || !secondaryResult.first) {
gotCryptoKeys = false;
} else {
Crypto::AESKey primaryKey = primaryResult.second;
Crypto::AESKey secondaryKey = secondaryResult.second;
primaryKey = primaryResult.second;
secondaryKey = secondaryResult.second;
EncryptionInfo encryptionInfoTmp;
encryptionInfoTmp.normalKey = primaryKey;
encryptionInfoTmp.normalKey = *primaryKey;
encryptionInfoTmp.initialCounter.fill(0);
for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
@ -94,7 +97,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
encryptionInfoTmp.initialCounter[8] = 2;
exeFS.encryptionInfo = encryptionInfoTmp;
encryptionInfoTmp.normalKey = secondaryKey;
encryptionInfoTmp.normalKey = *secondaryKey;
encryptionInfoTmp.initialCounter[8] = 3;
romFS.encryptionInfo = encryptionInfoTmp;
}
@ -201,13 +204,20 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
Helpers::panic("Second code file in a single NCCH partition. What should this do?\n");
}
// All files in ExeFS use the same IV, though .code uses the secondary key for decryption
// whereas .icon/.banner use the primary key.
FSInfo info = exeFS;
if (encrypted && secondaryKey.has_value() && info.encryptionInfo.has_value()) {
info.encryptionInfo->normalKey = *secondaryKey;
}
if (compressCode) {
std::vector<u8> tmp;
tmp.resize(fileSize);
// A file offset of 0 means our file is located right after the ExeFS header
// So in the ROM, files are located at (file offset + exeFS offset + exeFS header size)
readFromFile(file, exeFS, tmp.data(), fileOffset + exeFSHeaderSize, fileSize);
readFromFile(file, info, tmp.data(), fileOffset + exeFSHeaderSize, fileSize);
// Decompress .code file from the tmp vector to the "code" vector
if (!CartLZ77::decompress(codeFile, tmp)) {
@ -216,7 +226,7 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
}
} else {
codeFile.resize(fileSize);
readFromFile(file, exeFS, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize);
readFromFile(file, info, codeFile.data(), fileOffset + exeFSHeaderSize, fileSize);
}
} else if (std::strcmp(name, "icon") == 0) {
// Parse icon file to extract region info and more in the future (logo, etc)

View file

@ -61,24 +61,7 @@ void RendererGL::initGraphicsContextInternal() {
OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex);
OpenGL::Shader frag({fragmentShaderSource.begin(), fragmentShaderSource.size()}, OpenGL::Fragment);
triangleProgram.create({vert, frag});
gl.useProgram(triangleProgram);
ubershaderData.textureEnvSourceLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvSource");
ubershaderData.textureEnvOperandLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvOperand");
ubershaderData.textureEnvCombinerLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvCombiner");
ubershaderData.textureEnvColorLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvColor");
ubershaderData.textureEnvScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_textureEnvScale");
ubershaderData.depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
ubershaderData.depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
ubershaderData.depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
ubershaderData.picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
initUbershader(triangleProgram);
auto displayVertexShaderSource = gl_resources.open("opengl_display.vert");
auto displayFragmentShaderSource = gl_resources.open("opengl_display.frag");
@ -870,4 +853,47 @@ void RendererGL::deinitGraphicsContext() {
// All other GL objects should be invalidated automatically and be recreated by the next call to initGraphicsContext
// TODO: Make it so that depth and colour buffers get written back to 3DS memory
printf("RendererGL::DeinitGraphicsContext called\n");
}
}
std::string RendererGL::getUbershader() {
auto gl_resources = cmrc::RendererGL::get_filesystem();
auto fragmentShader = gl_resources.open("opengl_fragment_shader.frag");
return std::string(fragmentShader.begin(), fragmentShader.end());
}
void RendererGL::setUbershader(const std::string& shader) {
auto gl_resources = cmrc::RendererGL::get_filesystem();
auto vertexShaderSource = gl_resources.open("opengl_vertex_shader.vert");
OpenGL::Shader vert({vertexShaderSource.begin(), vertexShaderSource.size()}, OpenGL::Vertex);
OpenGL::Shader frag(shader, OpenGL::Fragment);
triangleProgram.create({vert, frag});
initUbershader(triangleProgram);
glUniform1f(depthScaleLoc, oldDepthScale);
glUniform1f(depthOffsetLoc, oldDepthOffset);
glUniform1i(depthmapEnableLoc, oldDepthmapEnable);
}
void RendererGL::initUbershader(OpenGL::Program& program) {
gl.useProgram(program);
textureEnvSourceLoc = OpenGL::uniformLocation(program, "u_textureEnvSource");
textureEnvOperandLoc = OpenGL::uniformLocation(program, "u_textureEnvOperand");
textureEnvCombinerLoc = OpenGL::uniformLocation(program, "u_textureEnvCombiner");
textureEnvColorLoc = OpenGL::uniformLocation(program, "u_textureEnvColor");
textureEnvScaleLoc = OpenGL::uniformLocation(program, "u_textureEnvScale");
depthScaleLoc = OpenGL::uniformLocation(program, "u_depthScale");
depthOffsetLoc = OpenGL::uniformLocation(program, "u_depthOffset");
depthmapEnableLoc = OpenGL::uniformLocation(program, "u_depthmapEnable");
picaRegLoc = OpenGL::uniformLocation(program, "u_picaRegs");
// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
glUniform1i(OpenGL::uniformLocation(program, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(program, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(program, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(program, "u_tex_lighting_lut"), 3);
}

View file

@ -61,6 +61,7 @@ void Y2RService::reset() {
inputLineWidth = 420;
conversionCoefficients.fill(0);
isBusy = false;
}
void Y2RService::handleSyncRequest(u32 messagePointer) {
@ -156,6 +157,11 @@ void Y2RService::setTransferEndInterrupt(u32 messagePointer) {
void Y2RService::stopConversion(u32 messagePointer) {
log("Y2R::StopConversion\n");
if (isBusy) {
isBusy = false;
kernel.getScheduler().removeEvent(Scheduler::EventType::SignalY2R);
}
mem.write32(messagePointer, IPC::responseHeader(0x27, 1, 0));
mem.write32(messagePointer + 4, Result::Success);
}
@ -167,7 +173,7 @@ void Y2RService::isBusyConversion(u32 messagePointer) {
mem.write32(messagePointer, IPC::responseHeader(0x28, 2, 0));
mem.write32(messagePointer + 4, Result::Success);
mem.write32(messagePointer + 8, static_cast<u32>(BusyStatus::NotBusy));
mem.write32(messagePointer + 8, static_cast<u32>(isBusy ? BusyStatus::Busy : BusyStatus::NotBusy));
}
void Y2RService::setBlockAlignment(u32 messagePointer) {
@ -434,11 +440,15 @@ void Y2RService::startConversion(u32 messagePointer) {
mem.write32(messagePointer, IPC::responseHeader(0x26, 1, 0));
mem.write32(messagePointer + 4, Result::Success);
// Make Y2R conversion end instantly.
// Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt?
if (transferEndEvent.has_value()) {
kernel.signalEvent(transferEndEvent.value());
}
// Schedule Y2R conversion end event.
// The tick value is tweaked based on the minimum delay needed to get FIFA 15 to not hang due to a race condition on its title screen
static constexpr u64 delayTicks = 1'350'000;
isBusy = true;
// Remove any potential pending Y2R event and schedule a new one
Scheduler& scheduler = kernel.getScheduler();
scheduler.removeEvent(Scheduler::EventType::SignalY2R);
scheduler.addEvent(Scheduler::EventType::SignalY2R, scheduler.currentTimestamp + delayTicks);
}
void Y2RService::isFinishedSendingYUV(u32 messagePointer) {
@ -484,4 +494,15 @@ void Y2RService::isFinishedReceiving(u32 messagePointer) {
mem.write32(messagePointer, IPC::responseHeader(0x17, 2, 0));
mem.write32(messagePointer + 4, Result::Success);
mem.write32(messagePointer + 8, finished ? 1 : 0);
}
void Y2RService::signalConversionDone() {
if (isBusy) {
isBusy = false;
// Signal the transfer end event if it's been created. TODO: Is this affected by SetTransferEndInterrupt?
if (transferEndEvent.has_value()) {
kernel.signalEvent(transferEndEvent.value());
}
}
}