Merge pull request #152 from wheremyfoodat/panda-kith

[WIP] Multithreading fixes
This commit is contained in:
wheremyfoodat 2023-08-02 20:21:18 +03:00 committed by GitHub
commit 1e7078c28b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 142 additions and 61 deletions

View file

@ -91,6 +91,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
void recCMP(const PICAShader& shader, u32 instruction);
void recDP3(const PICAShader& shader, u32 instruction);
void recDP4(const PICAShader& shader, u32 instruction);
void recDPH(const PICAShader& shader, u32 instruction);
void recEMIT(const PICAShader& shader, u32 instruction);
void recEND(const PICAShader& shader, u32 instruction);
void recEX2(const PICAShader& shader, u32 instruction);
@ -111,7 +112,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
void recRSQ(const PICAShader& shader, u32 instruction);
void recSETEMIT(const PICAShader& shader, u32 instruction);
void recSGE(const PICAShader& shader, u32 instruction);
void recSGEI(const PICAShader& shader, u32 instruction);
void recSLT(const PICAShader& shader, u32 instruction);
MAKE_LOG_FUNCTION(log, shaderJITLogger)

View file

@ -23,6 +23,7 @@ namespace ShaderOpcodes {
LG2 = 0x06,
LIT = 0x07,
MUL = 0x08,
SGE = 0x09,
SLT = 0x0A,
FLR = 0x0B,
MAX = 0x0C,

View file

@ -52,6 +52,9 @@ class Kernel {
// Top 8 bits are the major version, bottom 8 are the minor version
u16 kernelVersion = 0;
// Shows whether a reschedule will be need
bool needReschedule = false;
Handle makeArbiter();
Handle makeProcess(u32 id);
Handle makePort(const char* name);
@ -73,7 +76,6 @@ private:
void switchThread(int newThreadIndex);
void sortThreads();
std::optional<int> getNextThread();
void switchToNextThread();
void rescheduleThreads();
bool canThreadRun(const Thread& t);
bool shouldWaitOnObject(KernelObject* object);
@ -168,6 +170,15 @@ public:
void serviceSVC(u32 svc);
void reset();
void requireReschedule() { needReschedule = true; }
void evalReschedule() {
if (needReschedule) {
needReschedule = false;
rescheduleThreads();
}
}
Handle makeObject(KernelObjectType type) {
if (handleCounter > KernelHandles::Max) [[unlikely]] {
Helpers::panic("Hlep we somehow created enough kernel objects to overflow this thing");

View file

@ -143,6 +143,7 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
break;
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
case ShaderOpcodes::DPH: recDPH(shaderUnit, instruction); break;
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
@ -179,6 +180,10 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
case ShaderOpcodes::SLTI:
recSLT(shaderUnit, instruction); break;
case ShaderOpcodes::SGE:
case ShaderOpcodes::SGEI:
recSGE(shaderUnit, instruction); break;
default:
Helpers::panic("Shader JIT: Unimplemented PICA opcode %X", opcode);
}
@ -525,6 +530,30 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
// Attach 1.0 to the w component of src1
if (haveSSE4_1) {
blendps(src1_xmm, xword[rip + onesVector], 0b1000);
} else {
movaps(scratch1, src1_xmm);
unpckhps(scratch1, xword[rip + onesVector]);
unpcklpd(src1_xmm, scratch1);
}
dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recMAX(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);
@ -656,6 +685,24 @@ void ShaderEmitter::recSLT(const PICAShader& shader, u32 instruction) {
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recSGE(const PICAShader& shader, u32 instruction) {
const bool isSGEI = (instruction >> 26) == ShaderOpcodes::SGEI;
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = isSGEI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction);
const u32 src2 = isSGEI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction);
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
loadRegister<1>(src1_xmm, shader, src1, isSGEI ? 0 : idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, isSGEI ? idx : 0, operandDescriptor);
// SSE does not have a cmpgeps instruction so we turn src1 >= src2 to src2 <= src1, result in src2
cmpleps(src2_xmm, src1_xmm);
andps(src2_xmm, xword[rip + onesVector]);
storeRegister(src2_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);

View file

@ -87,7 +87,7 @@ void Kernel::arbitrateAddress() {
Helpers::panic("ArbitrateAddress: Unimplemented type %s", arbitrationTypeToString(type));
}
rescheduleThreads();
requireReschedule();
}
// Signal up to "threadCount" threads waiting on the arbiter indicated by "waitingAddress"

View file

@ -35,22 +35,15 @@ bool Kernel::signalEvent(Handle handle) {
// Check if there's any thread waiting on this event
if (event->waitlist != 0) {
// One-shot events get cleared once they are acquired by some thread and only wake up 1 thread at a time
wakeupAllThreads(event->waitlist, handle);
event->waitlist = 0; // No threads waiting;
if (event->resetType == ResetType::OneShot) {
int index = wakeupOneThread(event->waitlist, handle); // Wake up one thread with the highest priority
event->waitlist ^= (1ull << index); // Remove thread from waitlist
event->fired = false;
} else {
wakeupAllThreads(event->waitlist, handle);
event->waitlist = 0; // No threads waiting;
}
// We must reschedule our threads if we signalled one. Some games such as FE: Awakening rely on this
// If this does not happen, we can have phenomena such as a thread waiting up a higher priority thread,
// and the higher priority thread just never running
rescheduleThreads();
}
rescheduleThreads();
return true;
}
@ -121,7 +114,6 @@ void Kernel::waitSynchronization1() {
if (!shouldWaitOnObject(object)) {
acquireSyncObject(object, threads[currentThreadIndex]); // Acquire the object since it's ready
regs[0] = Result::Success;
rescheduleThreads();
} else {
// Timeout is 0, don't bother waiting, instantly timeout
if (ns == 0) {
@ -141,7 +133,7 @@ void Kernel::waitSynchronization1() {
// Add the current thread to the object's wait list
object->getWaitlist() |= (1ull << currentThreadIndex);
switchToNextThread();
requireReschedule();
}
}
@ -204,14 +196,13 @@ void Kernel::waitSynchronizationN() {
auto& t = threads[currentThreadIndex];
// We only need to wait on one object. Easy...?!
// We only need to wait on one object. Easy.
if (!waitAll) {
// If there's ready objects, acquire the first one and return
if (oneObjectReady) {
regs[0] = Result::Success;
regs[1] = firstReadyObjectIndex; // Return index of the acquired object
acquireSyncObject(waitObjects[firstReadyObjectIndex].second, t); // Acquire object
rescheduleThreads();
return;
}
@ -229,8 +220,8 @@ void Kernel::waitSynchronizationN() {
waitObjects[i].second->getWaitlist() |= (1ull << currentThreadIndex); // And add the thread to the object's waitlist
}
switchToNextThread();
requireReschedule();
} else {
Helpers::panic("WaitSynchronizatioN with waitAll");
Helpers::panic("WaitSynchronizationN with waitAll");
}
}

View file

@ -61,6 +61,8 @@ void Kernel::serviceSVC(u32 svc) {
case 0x3D: outputDebugString(); break;
default: Helpers::panic("Unimplemented svc: %X @ %08X", svc, regs[15]); break;
}
evalReschedule();
}
void Kernel::setVersion(u8 major, u8 minor) {
@ -140,6 +142,8 @@ void Kernel::reset() {
threadIndices.clear();
serviceManager.reset();
needReschedule = false;
// Allocate handle #0 to a dummy object and make a main process object
makeObject(KernelObjectType::Dummy);
currentProcess = makeProcess(1); // Use ID = 1 for main process

View file

@ -76,6 +76,11 @@ void Kernel::sendSyncRequest() {
u32 messagePointer = getTLSPointer() + 0x80; // The message is stored starting at TLS+0x80
logSVC("SendSyncRequest(session handle = %X)\n", handle);
// Service calls via SendSyncRequest and file access needs to put the caller to sleep for a given amount of time
// To make sure that the other threads don't get starved. Various games rely on this (including Sonic Boom: Shattering Crystal it seems)
constexpr u64 syncRequestDelayNs = 39000;
sleepThread(syncRequestDelayNs);
// The sync request is being sent at a service rather than whatever port, so have the service manager intercept it
if (KernelHandles::isServiceHandle(handle)) {
// The service call might cause a reschedule and change threads. Hence, set r0 before executing the service call

View file

@ -82,32 +82,26 @@ std::optional<int> Kernel::getNextThread() {
return std::nullopt;
}
void Kernel::switchToNextThread() {
std::optional<int> newThreadIndex = getNextThread();
if (!newThreadIndex.has_value()) {
log("Kernel tried to switch to the next thread but none found. Switching to random thread\n");
assert(aliveThreadCount != 0);
Helpers::panic("rpog");
int index;
do {
index = rand() % threadCount;
} while (threads[index].status == ThreadStatus::Dead); // TODO: Pray this doesn't hang
switchThread(index);
} else {
switchThread(newThreadIndex.value());
}
}
// See if there;s a higher priority, ready thread and switch to that
// See if there is a higher priority, ready thread and switch to that
void Kernel::rescheduleThreads() {
Thread& current = threads[currentThreadIndex]; // Current running thread
// If the current thread is running and hasn't gone to sleep or whatever, set it to Ready instead of Running
// So that getNextThread will evaluate it properly
if (current.status == ThreadStatus::Running) {
current.status = ThreadStatus::Ready;
}
ThreadStatus currentStatus = current.status;
std::optional<int> newThreadIndex = getNextThread();
if (newThreadIndex.has_value() && newThreadIndex.value() != currentThreadIndex) {
threads[currentThreadIndex].status = ThreadStatus::Ready;
// Case 1: A thread can run
if (newThreadIndex.has_value()) {
switchThread(newThreadIndex.value());
}
// Case 2: No other thread can run, straight to the idle thread
else {
switchThread(idleThreadIndex);
}
}
@ -184,6 +178,7 @@ void Kernel::releaseMutex(Mutex* moo) {
// If the lock count reached 0 then the thread no longer owns the mootex and it can be given to a new one
if (moo->lockCount == 0) {
moo->locked = false;
if (moo->waitlist != 0) {
int index = wakeupOneThread(moo->waitlist, moo->handle); // Wake up one thread and get its index
moo->waitlist ^= (1ull << index); // Remove thread from waitlist
@ -194,7 +189,7 @@ void Kernel::releaseMutex(Mutex* moo) {
moo->ownerThread = index;
}
rescheduleThreads();
requireReschedule();
}
}
@ -210,7 +205,7 @@ void Kernel::sleepThreadOnArbiter(u32 waitingAddress) {
t.status = ThreadStatus::WaitArbiter;
t.waitingAddress = waitingAddress;
switchToNextThread();
requireReschedule();
}
// Acquires an object that is **ready to be acquired** without waiting on it
@ -226,7 +221,13 @@ void Kernel::acquireSyncObject(KernelObject* object, const Thread& thread) {
case KernelObjectType::Mutex: {
Mutex* moo = object->getData<Mutex>();
moo->locked = true; // Set locked to true, whether it's false or not because who cares
// Only reschedule if we're acquiring the mutex for the first time
if (!moo->locked) {
moo->locked = true;
requireReschedule();
}
// Increment lock count by 1. If a thread acquires a mootex multiple times, it needs to release it until count == 0
// For the mootex to be free.
moo->lockCount++;
@ -338,20 +339,31 @@ void Kernel::wakeupAllThreads(u64 waitlist, Handle handle) {
void Kernel::sleepThread(s64 ns) {
if (ns < 0) {
Helpers::panic("Sleeping a thread for a negative amount of ns");
} else if (ns == 0) { // Used when we want to force a thread switch
std::optional<int> newThreadIndex = getNextThread();
// If there's no other thread waiting, don't bother yielding
if (newThreadIndex.has_value()) {
threads[currentThreadIndex].status = ThreadStatus::Ready;
switchThread(newThreadIndex.value());
}
} else { // If we're sleeping for > 0 ns
} else if (ns == 0) {
// TODO: This is garbage, but it works so eh we can keep it for now
Thread& t = threads[currentThreadIndex];
// See if a thread other than this and the idle thread is waiting to run by temp marking the current function as dead and searching
// If there is another thread to run, then run it. Otherwise, go back to this thread, not to the idle thread
t.status = ThreadStatus::Dead;
auto nextThreadIndex = getNextThread();
t.status = ThreadStatus::Ready;
if (nextThreadIndex.has_value()) {
const auto index = nextThreadIndex.value();
if (index != idleThreadIndex) {
switchThread(index);
}
}
} else { // If we're sleeping for >= 0 ns
Thread& t = threads[currentThreadIndex];
t.status = ThreadStatus::WaitSleep;
t.waitingNanoseconds = ns;
t.sleepTick = cpu.getTicks();
switchToNextThread();
requireReschedule();
}
}
@ -374,7 +386,7 @@ void Kernel::createThread() {
regs[0] = Result::Success;
regs[1] = makeThread(entrypoint, initialSP, priority, id, arg, ThreadStatus::Ready);
rescheduleThreads();
requireReschedule();
}
// void SleepThread(s64 nanoseconds)
@ -448,7 +460,7 @@ void Kernel::setThreadPriority() {
}
}
sortThreads();
rescheduleThreads();
requireReschedule();
}
void Kernel::exitThread() {
@ -472,7 +484,7 @@ void Kernel::exitThread() {
t.threadsWaitingForTermination = 0; // No other threads waiting
}
switchToNextThread();
requireReschedule();
}
void Kernel::svcCreateMutex() {

View file

@ -357,6 +357,8 @@ void Emulator::run() {
hid.updateInputs(cpu.getTicks());
}
// TODO: Should this be uncommented?
// kernel.evalReschedule();
// Update inputs in the HID module
SDL_GL_SwapWindow(window);

View file

@ -228,10 +228,18 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
));
// Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) error_unimpl = true;
vec3 half_vector;
vec3 half_vector = normalize(normalize(light_vector) + view);
// Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) {
error_unimpl = true;
// half_vector = normalize(normalize(light_vector + v_view) + view);
}
// Directional light
else {
half_vector = normalize(normalize(light_vector) + view);
}
for (int c = 0; c < 7; c++) {
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0) {