mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-11 08:39:48 +12:00
Merge pull request #152 from wheremyfoodat/panda-kith
[WIP] Multithreading fixes
This commit is contained in:
commit
1e7078c28b
11 changed files with 142 additions and 61 deletions
|
@ -91,6 +91,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
void recCMP(const PICAShader& shader, u32 instruction);
|
||||
void recDP3(const PICAShader& shader, u32 instruction);
|
||||
void recDP4(const PICAShader& shader, u32 instruction);
|
||||
void recDPH(const PICAShader& shader, u32 instruction);
|
||||
void recEMIT(const PICAShader& shader, u32 instruction);
|
||||
void recEND(const PICAShader& shader, u32 instruction);
|
||||
void recEX2(const PICAShader& shader, u32 instruction);
|
||||
|
@ -111,7 +112,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
|
|||
void recRSQ(const PICAShader& shader, u32 instruction);
|
||||
void recSETEMIT(const PICAShader& shader, u32 instruction);
|
||||
void recSGE(const PICAShader& shader, u32 instruction);
|
||||
void recSGEI(const PICAShader& shader, u32 instruction);
|
||||
void recSLT(const PICAShader& shader, u32 instruction);
|
||||
|
||||
MAKE_LOG_FUNCTION(log, shaderJITLogger)
|
||||
|
|
|
@ -23,6 +23,7 @@ namespace ShaderOpcodes {
|
|||
LG2 = 0x06,
|
||||
LIT = 0x07,
|
||||
MUL = 0x08,
|
||||
SGE = 0x09,
|
||||
SLT = 0x0A,
|
||||
FLR = 0x0B,
|
||||
MAX = 0x0C,
|
||||
|
|
|
@ -52,6 +52,9 @@ class Kernel {
|
|||
// Top 8 bits are the major version, bottom 8 are the minor version
|
||||
u16 kernelVersion = 0;
|
||||
|
||||
// Shows whether a reschedule will be need
|
||||
bool needReschedule = false;
|
||||
|
||||
Handle makeArbiter();
|
||||
Handle makeProcess(u32 id);
|
||||
Handle makePort(const char* name);
|
||||
|
@ -73,7 +76,6 @@ private:
|
|||
void switchThread(int newThreadIndex);
|
||||
void sortThreads();
|
||||
std::optional<int> getNextThread();
|
||||
void switchToNextThread();
|
||||
void rescheduleThreads();
|
||||
bool canThreadRun(const Thread& t);
|
||||
bool shouldWaitOnObject(KernelObject* object);
|
||||
|
@ -168,6 +170,15 @@ public:
|
|||
void serviceSVC(u32 svc);
|
||||
void reset();
|
||||
|
||||
void requireReschedule() { needReschedule = true; }
|
||||
|
||||
void evalReschedule() {
|
||||
if (needReschedule) {
|
||||
needReschedule = false;
|
||||
rescheduleThreads();
|
||||
}
|
||||
}
|
||||
|
||||
Handle makeObject(KernelObjectType type) {
|
||||
if (handleCounter > KernelHandles::Max) [[unlikely]] {
|
||||
Helpers::panic("Hlep we somehow created enough kernel objects to overflow this thing");
|
||||
|
|
|
@ -143,6 +143,7 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
|
|||
break;
|
||||
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::DPH: recDPH(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
|
||||
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
|
||||
|
@ -179,6 +180,10 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
|
|||
case ShaderOpcodes::SLTI:
|
||||
recSLT(shaderUnit, instruction); break;
|
||||
|
||||
case ShaderOpcodes::SGE:
|
||||
case ShaderOpcodes::SGEI:
|
||||
recSGE(shaderUnit, instruction); break;
|
||||
|
||||
default:
|
||||
Helpers::panic("Shader JIT: Unimplemented PICA opcode %X", opcode);
|
||||
}
|
||||
|
@ -525,6 +530,30 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
|
|||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
|
||||
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
|
||||
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
|
||||
|
||||
// Attach 1.0 to the w component of src1
|
||||
if (haveSSE4_1) {
|
||||
blendps(src1_xmm, xword[rip + onesVector], 0b1000);
|
||||
} else {
|
||||
movaps(scratch1, src1_xmm);
|
||||
unpckhps(scratch1, xword[rip + onesVector]);
|
||||
unpcklpd(src1_xmm, scratch1);
|
||||
}
|
||||
|
||||
dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
|
||||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
void ShaderEmitter::recMAX(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
|
@ -656,6 +685,24 @@ void ShaderEmitter::recSLT(const PICAShader& shader, u32 instruction) {
|
|||
storeRegister(src1_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
void ShaderEmitter::recSGE(const PICAShader& shader, u32 instruction) {
|
||||
const bool isSGEI = (instruction >> 26) == ShaderOpcodes::SGEI;
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
|
||||
const u32 src1 = isSGEI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction);
|
||||
const u32 src2 = isSGEI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction);
|
||||
const u32 idx = getBits<19, 2>(instruction);
|
||||
const u32 dest = getBits<21, 5>(instruction);
|
||||
|
||||
loadRegister<1>(src1_xmm, shader, src1, isSGEI ? 0 : idx, operandDescriptor);
|
||||
loadRegister<2>(src2_xmm, shader, src2, isSGEI ? idx : 0, operandDescriptor);
|
||||
|
||||
// SSE does not have a cmpgeps instruction so we turn src1 >= src2 to src2 <= src1, result in src2
|
||||
cmpleps(src2_xmm, src1_xmm);
|
||||
andps(src2_xmm, xword[rip + onesVector]);
|
||||
storeRegister(src2_xmm, shader, dest, operandDescriptor);
|
||||
}
|
||||
|
||||
void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
|
||||
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
|
||||
const u32 src1 = getBits<12, 7>(instruction);
|
||||
|
|
|
@ -87,7 +87,7 @@ void Kernel::arbitrateAddress() {
|
|||
Helpers::panic("ArbitrateAddress: Unimplemented type %s", arbitrationTypeToString(type));
|
||||
}
|
||||
|
||||
rescheduleThreads();
|
||||
requireReschedule();
|
||||
}
|
||||
|
||||
// Signal up to "threadCount" threads waiting on the arbiter indicated by "waitingAddress"
|
||||
|
|
|
@ -35,22 +35,15 @@ bool Kernel::signalEvent(Handle handle) {
|
|||
|
||||
// Check if there's any thread waiting on this event
|
||||
if (event->waitlist != 0) {
|
||||
// One-shot events get cleared once they are acquired by some thread and only wake up 1 thread at a time
|
||||
wakeupAllThreads(event->waitlist, handle);
|
||||
event->waitlist = 0; // No threads waiting;
|
||||
|
||||
if (event->resetType == ResetType::OneShot) {
|
||||
int index = wakeupOneThread(event->waitlist, handle); // Wake up one thread with the highest priority
|
||||
event->waitlist ^= (1ull << index); // Remove thread from waitlist
|
||||
event->fired = false;
|
||||
} else {
|
||||
wakeupAllThreads(event->waitlist, handle);
|
||||
event->waitlist = 0; // No threads waiting;
|
||||
}
|
||||
|
||||
// We must reschedule our threads if we signalled one. Some games such as FE: Awakening rely on this
|
||||
// If this does not happen, we can have phenomena such as a thread waiting up a higher priority thread,
|
||||
// and the higher priority thread just never running
|
||||
rescheduleThreads();
|
||||
}
|
||||
|
||||
|
||||
rescheduleThreads();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -121,7 +114,6 @@ void Kernel::waitSynchronization1() {
|
|||
if (!shouldWaitOnObject(object)) {
|
||||
acquireSyncObject(object, threads[currentThreadIndex]); // Acquire the object since it's ready
|
||||
regs[0] = Result::Success;
|
||||
rescheduleThreads();
|
||||
} else {
|
||||
// Timeout is 0, don't bother waiting, instantly timeout
|
||||
if (ns == 0) {
|
||||
|
@ -141,7 +133,7 @@ void Kernel::waitSynchronization1() {
|
|||
// Add the current thread to the object's wait list
|
||||
object->getWaitlist() |= (1ull << currentThreadIndex);
|
||||
|
||||
switchToNextThread();
|
||||
requireReschedule();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,14 +196,13 @@ void Kernel::waitSynchronizationN() {
|
|||
|
||||
auto& t = threads[currentThreadIndex];
|
||||
|
||||
// We only need to wait on one object. Easy...?!
|
||||
// We only need to wait on one object. Easy.
|
||||
if (!waitAll) {
|
||||
// If there's ready objects, acquire the first one and return
|
||||
if (oneObjectReady) {
|
||||
regs[0] = Result::Success;
|
||||
regs[1] = firstReadyObjectIndex; // Return index of the acquired object
|
||||
acquireSyncObject(waitObjects[firstReadyObjectIndex].second, t); // Acquire object
|
||||
rescheduleThreads();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -229,8 +220,8 @@ void Kernel::waitSynchronizationN() {
|
|||
waitObjects[i].second->getWaitlist() |= (1ull << currentThreadIndex); // And add the thread to the object's waitlist
|
||||
}
|
||||
|
||||
switchToNextThread();
|
||||
requireReschedule();
|
||||
} else {
|
||||
Helpers::panic("WaitSynchronizatioN with waitAll");
|
||||
Helpers::panic("WaitSynchronizationN with waitAll");
|
||||
}
|
||||
}
|
|
@ -61,6 +61,8 @@ void Kernel::serviceSVC(u32 svc) {
|
|||
case 0x3D: outputDebugString(); break;
|
||||
default: Helpers::panic("Unimplemented svc: %X @ %08X", svc, regs[15]); break;
|
||||
}
|
||||
|
||||
evalReschedule();
|
||||
}
|
||||
|
||||
void Kernel::setVersion(u8 major, u8 minor) {
|
||||
|
@ -140,6 +142,8 @@ void Kernel::reset() {
|
|||
threadIndices.clear();
|
||||
serviceManager.reset();
|
||||
|
||||
needReschedule = false;
|
||||
|
||||
// Allocate handle #0 to a dummy object and make a main process object
|
||||
makeObject(KernelObjectType::Dummy);
|
||||
currentProcess = makeProcess(1); // Use ID = 1 for main process
|
||||
|
|
|
@ -76,6 +76,11 @@ void Kernel::sendSyncRequest() {
|
|||
u32 messagePointer = getTLSPointer() + 0x80; // The message is stored starting at TLS+0x80
|
||||
logSVC("SendSyncRequest(session handle = %X)\n", handle);
|
||||
|
||||
// Service calls via SendSyncRequest and file access needs to put the caller to sleep for a given amount of time
|
||||
// To make sure that the other threads don't get starved. Various games rely on this (including Sonic Boom: Shattering Crystal it seems)
|
||||
constexpr u64 syncRequestDelayNs = 39000;
|
||||
sleepThread(syncRequestDelayNs);
|
||||
|
||||
// The sync request is being sent at a service rather than whatever port, so have the service manager intercept it
|
||||
if (KernelHandles::isServiceHandle(handle)) {
|
||||
// The service call might cause a reschedule and change threads. Hence, set r0 before executing the service call
|
||||
|
|
|
@ -82,32 +82,26 @@ std::optional<int> Kernel::getNextThread() {
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
void Kernel::switchToNextThread() {
|
||||
std::optional<int> newThreadIndex = getNextThread();
|
||||
|
||||
if (!newThreadIndex.has_value()) {
|
||||
log("Kernel tried to switch to the next thread but none found. Switching to random thread\n");
|
||||
assert(aliveThreadCount != 0);
|
||||
Helpers::panic("rpog");
|
||||
|
||||
int index;
|
||||
do {
|
||||
index = rand() % threadCount;
|
||||
} while (threads[index].status == ThreadStatus::Dead); // TODO: Pray this doesn't hang
|
||||
|
||||
switchThread(index);
|
||||
} else {
|
||||
switchThread(newThreadIndex.value());
|
||||
}
|
||||
}
|
||||
|
||||
// See if there;s a higher priority, ready thread and switch to that
|
||||
// See if there is a higher priority, ready thread and switch to that
|
||||
void Kernel::rescheduleThreads() {
|
||||
Thread& current = threads[currentThreadIndex]; // Current running thread
|
||||
|
||||
// If the current thread is running and hasn't gone to sleep or whatever, set it to Ready instead of Running
|
||||
// So that getNextThread will evaluate it properly
|
||||
if (current.status == ThreadStatus::Running) {
|
||||
current.status = ThreadStatus::Ready;
|
||||
}
|
||||
ThreadStatus currentStatus = current.status;
|
||||
std::optional<int> newThreadIndex = getNextThread();
|
||||
|
||||
if (newThreadIndex.has_value() && newThreadIndex.value() != currentThreadIndex) {
|
||||
threads[currentThreadIndex].status = ThreadStatus::Ready;
|
||||
// Case 1: A thread can run
|
||||
if (newThreadIndex.has_value()) {
|
||||
switchThread(newThreadIndex.value());
|
||||
}
|
||||
|
||||
// Case 2: No other thread can run, straight to the idle thread
|
||||
else {
|
||||
switchThread(idleThreadIndex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,6 +178,7 @@ void Kernel::releaseMutex(Mutex* moo) {
|
|||
// If the lock count reached 0 then the thread no longer owns the mootex and it can be given to a new one
|
||||
if (moo->lockCount == 0) {
|
||||
moo->locked = false;
|
||||
|
||||
if (moo->waitlist != 0) {
|
||||
int index = wakeupOneThread(moo->waitlist, moo->handle); // Wake up one thread and get its index
|
||||
moo->waitlist ^= (1ull << index); // Remove thread from waitlist
|
||||
|
@ -194,7 +189,7 @@ void Kernel::releaseMutex(Mutex* moo) {
|
|||
moo->ownerThread = index;
|
||||
}
|
||||
|
||||
rescheduleThreads();
|
||||
requireReschedule();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -210,7 +205,7 @@ void Kernel::sleepThreadOnArbiter(u32 waitingAddress) {
|
|||
t.status = ThreadStatus::WaitArbiter;
|
||||
t.waitingAddress = waitingAddress;
|
||||
|
||||
switchToNextThread();
|
||||
requireReschedule();
|
||||
}
|
||||
|
||||
// Acquires an object that is **ready to be acquired** without waiting on it
|
||||
|
@ -226,7 +221,13 @@ void Kernel::acquireSyncObject(KernelObject* object, const Thread& thread) {
|
|||
|
||||
case KernelObjectType::Mutex: {
|
||||
Mutex* moo = object->getData<Mutex>();
|
||||
moo->locked = true; // Set locked to true, whether it's false or not because who cares
|
||||
|
||||
// Only reschedule if we're acquiring the mutex for the first time
|
||||
if (!moo->locked) {
|
||||
moo->locked = true;
|
||||
requireReschedule();
|
||||
}
|
||||
|
||||
// Increment lock count by 1. If a thread acquires a mootex multiple times, it needs to release it until count == 0
|
||||
// For the mootex to be free.
|
||||
moo->lockCount++;
|
||||
|
@ -338,20 +339,31 @@ void Kernel::wakeupAllThreads(u64 waitlist, Handle handle) {
|
|||
void Kernel::sleepThread(s64 ns) {
|
||||
if (ns < 0) {
|
||||
Helpers::panic("Sleeping a thread for a negative amount of ns");
|
||||
} else if (ns == 0) { // Used when we want to force a thread switch
|
||||
std::optional<int> newThreadIndex = getNextThread();
|
||||
// If there's no other thread waiting, don't bother yielding
|
||||
if (newThreadIndex.has_value()) {
|
||||
threads[currentThreadIndex].status = ThreadStatus::Ready;
|
||||
switchThread(newThreadIndex.value());
|
||||
}
|
||||
} else { // If we're sleeping for > 0 ns
|
||||
} else if (ns == 0) {
|
||||
// TODO: This is garbage, but it works so eh we can keep it for now
|
||||
Thread& t = threads[currentThreadIndex];
|
||||
|
||||
// See if a thread other than this and the idle thread is waiting to run by temp marking the current function as dead and searching
|
||||
// If there is another thread to run, then run it. Otherwise, go back to this thread, not to the idle thread
|
||||
t.status = ThreadStatus::Dead;
|
||||
auto nextThreadIndex = getNextThread();
|
||||
t.status = ThreadStatus::Ready;
|
||||
|
||||
if (nextThreadIndex.has_value()) {
|
||||
const auto index = nextThreadIndex.value();
|
||||
|
||||
if (index != idleThreadIndex) {
|
||||
switchThread(index);
|
||||
}
|
||||
}
|
||||
} else { // If we're sleeping for >= 0 ns
|
||||
Thread& t = threads[currentThreadIndex];
|
||||
|
||||
t.status = ThreadStatus::WaitSleep;
|
||||
t.waitingNanoseconds = ns;
|
||||
t.sleepTick = cpu.getTicks();
|
||||
|
||||
switchToNextThread();
|
||||
requireReschedule();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -374,7 +386,7 @@ void Kernel::createThread() {
|
|||
|
||||
regs[0] = Result::Success;
|
||||
regs[1] = makeThread(entrypoint, initialSP, priority, id, arg, ThreadStatus::Ready);
|
||||
rescheduleThreads();
|
||||
requireReschedule();
|
||||
}
|
||||
|
||||
// void SleepThread(s64 nanoseconds)
|
||||
|
@ -448,7 +460,7 @@ void Kernel::setThreadPriority() {
|
|||
}
|
||||
}
|
||||
sortThreads();
|
||||
rescheduleThreads();
|
||||
requireReschedule();
|
||||
}
|
||||
|
||||
void Kernel::exitThread() {
|
||||
|
@ -472,7 +484,7 @@ void Kernel::exitThread() {
|
|||
t.threadsWaitingForTermination = 0; // No other threads waiting
|
||||
}
|
||||
|
||||
switchToNextThread();
|
||||
requireReschedule();
|
||||
}
|
||||
|
||||
void Kernel::svcCreateMutex() {
|
||||
|
|
|
@ -357,6 +357,8 @@ void Emulator::run() {
|
|||
|
||||
hid.updateInputs(cpu.getTicks());
|
||||
}
|
||||
// TODO: Should this be uncommented?
|
||||
// kernel.evalReschedule();
|
||||
|
||||
// Update inputs in the HID module
|
||||
SDL_GL_SwapWindow(window);
|
||||
|
|
|
@ -228,10 +228,18 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
|
|||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
|
||||
));
|
||||
|
||||
// Positional Light
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) error_unimpl = true;
|
||||
vec3 half_vector;
|
||||
|
||||
vec3 half_vector = normalize(normalize(light_vector) + view);
|
||||
// Positional Light
|
||||
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) {
|
||||
error_unimpl = true;
|
||||
// half_vector = normalize(normalize(light_vector + v_view) + view);
|
||||
}
|
||||
|
||||
// Directional light
|
||||
else {
|
||||
half_vector = normalize(normalize(light_vector) + view);
|
||||
}
|
||||
|
||||
for (int c = 0; c < 7; c++) {
|
||||
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0) {
|
||||
|
|
Loading…
Add table
Reference in a new issue