Merge pull request #61 from skylersaleh/lighting_impl

Implement Fragment Lighting (and clipping planes)
2025-07-01 12:56:21 +12:00 · 2023-07-04 20:52:16 +03:00 · 2023-07-04 20:52:16 +03:00 · ee49f89779
commit ee49f89779
parent 14d287dbd7 aa27389473
20 changed files with 582 additions and 98 deletions
--- a/.github/workflows/Linux_Build.yml
+++ b/.github/workflows/Linux_Build.yml
@ -26,7 +26,7 @@ jobs:
    - name: Configure CMake
      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_USER_BUILD=ON
    - name: Build
      # Build your program with the given configuration
--- a/.github/workflows/MacOS_Build.yml
+++ b/.github/workflows/MacOS_Build.yml
@ -23,13 +23,10 @@ jobs:
    - name: Fetch submodules
      run: git submodule update --init --recursive
    - name: Install LLVM # MacOS comes with "AppleClang" instead of regular Clang, and it can't build the project because no proper C++20
      run: brew install llvm
    - name: Configure CMake
      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON
    - name: Build
      # Build your program with the given configuration
--- a/.github/workflows/Windows_Build.yml
+++ b/.github/workflows/Windows_Build.yml
@ -26,7 +26,7 @@ jobs:
    - name: Configure CMake
      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON
    - name: Build
      # Build your program with the given configuration
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fbracket-depth=4096")
 endif()
@ -13,8 +13,14 @@ endif()
 project(Alber)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
 if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security")
 endif() 
 option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF)
 option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
 option(ENABLE_LTO "Enable link-time optimization" OFF)
 option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF)
 include_directories(${PROJECT_SOURCE_DIR}/include/)
 include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
@ -159,7 +165,7 @@ source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES})
 add_executable(Alber ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES}
 ${PICA_SOURCE_FILES} ${RENDERER_GL_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES})
-if(ENABLE_LTO)
+if(ENABLE_LTO OR ENABLE_USER_BUILD)
  set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
 endif()
@ -167,4 +173,12 @@ target_link_libraries(Alber PRIVATE dynarmic SDL2-static glad cryptopp)
 if(GPU_DEBUG_INFO)
  target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1)
-endif()
+endif()
 if(ENABLE_USER_BUILD)
    target_compile_definitions(Alber PRIVATE PANDA3DS_USER_BUILD=1)
 endif()
 if(ENABLE_USER_BUILD OR DISABLE_PANIC_DEV)
    target_compile_definitions(Alber PRIVATE PANDA3DS_LIMITED_PANICS=1)
 endif()
--- a/include/PICA/gpu.hpp
+++ b/include/PICA/gpu.hpp
@ -69,7 +69,18 @@ class GPU {
 	Renderer renderer;
 	PicaVertex getImmediateModeVertex();
-public:
+
  public:
 	// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
 	// Encoded in PICA native format
 	static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256;
 	std::array<uint32_t, LightingLutSize> lightingLUT;
 	// Used to prevent uploading the lighting_lut on every draw call
 	// Set to true when the CPU writes to the lighting_lut
 	// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
 	bool lightingLUTDirty = false;
 	GPU(Memory& mem);
 	void initGraphicsContext() { renderer.initGraphicsContext(); }
 	void getGraphicsContext() { renderer.getGraphicsContext(); }
--- a/include/PICA/regs.hpp
+++ b/include/PICA/regs.hpp
@ -10,6 +10,13 @@ namespace PICA {
 			ViewportHeight = 0x43,
 			ViewportInvh = 0x44,
 			// Clipping plane control
 			ClipEnable = 0x47,
 			ClipData0 = 0x48,
 			ClipData1 = 0x49,
 			ClipData2 = 0x4A,
 			ClipData3 = 0x4B,
 			DepthScale = 0x4D,
 			DepthOffset = 0x4E,
 			ShaderOutputCount = 0x4F,
@ -55,6 +62,17 @@ namespace PICA {
 			ColourBufferLoc = 0x11D,
 			FramebufferSize = 0x11E,
 			//LightingRegs
 			LightingLUTIndex =  0x01C5,
 			LightingLUTData0 =  0x01C8,
 			LightingLUTData1 =  0x01C9,
 			LightingLUTData2 =  0x01CA,
 			LightingLUTData3 =  0x01CB,
 			LightingLUTData4 =  0x01CC,
 			LightingLUTData5 =  0x01CD,
 			LightingLUTData6 =  0x01CE,
 			LightingLUTData7 =  0x01CF,
 			// Geometry pipeline registers
 			VertexAttribLoc = 0x200,
 			AttribFormatLow = 0x201,
@ -156,6 +174,34 @@ namespace PICA {
 		};
 	}
 	namespace Lights {
 		enum : u32 {
 			LUT_D0 = 0,
 			LUT_D1,
 			LUT_FR,
 			LUT_RB,
 			LUT_RG,
 			LUT_RR,
 			LUT_SP0 = 0x8,
 			LUT_SP1,
 			LUT_SP2,
 			LUT_SP3,
 			LUT_SP4,
 			LUT_SP5,
 			LUT_SP6,
 			LUT_SP7,
 			LUT_DA0 = 0x10,
 			LUT_DA1,
 			LUT_DA2,
 			LUT_DA3,
 			LUT_DA4,
 			LUT_DA5,
 			LUT_DA6,
 			LUT_DA7,
 			LUT_Count
 		};
 	}
 	enum class TextureFmt : u32 {
 		RGBA8 = 0x0,
 		RGB8 = 0x1,
--- a/include/helpers.hpp
+++ b/include/helpers.hpp
@ -30,24 +30,31 @@ using s32 = std::int32_t;
 using s64 = std::int64_t;
 namespace Helpers {
-	[[noreturn]] static void panic(const char* fmt, ...) {
+	// Unconditional panic, unlike panicDev which does not panic on user builds
-		std::va_list args;
+	template <class... Args>
-		va_start(args, fmt);
+	[[noreturn]] static void panic(const char* fmt, Args&&... args) {
 		std::cout << termcolor::on_red << "[FATAL] ";
-		std::vprintf(fmt, args);
+		std::printf(fmt, args...);
 		std::cout << termcolor::reset << "\n";
 		va_end(args);
 		exit(1);
 	}
 #ifdef PANDA3DS_LIMITED_PANICS
 	template <class... Args>
 	static void panicDev(const char* fmt, Args&&... args) {}
 #else
 	template <class... Args>
 	[[noreturn]] static void panicDev(const char* fmt, Args&&... args) {
 		panic(fmt, args...);
 	}
 #endif
-	static void warn(const char* fmt, ...) {
+	template <class... Args>
-		std::va_list args;
+	static void warn(const char* fmt, Args&&... args) {
 		va_start(args, fmt);
 		std::cout << termcolor::on_red << "[Warning] ";
-		std::vprintf(fmt, args);
+		std::printf(fmt, args...);
 		std::cout << termcolor::reset << "\n";
 		va_end(args);
 	}
 	static constexpr bool buildingInDebugMode() {
@ -57,6 +64,13 @@ namespace Helpers {
 		return true;
 	}
 	static constexpr bool isUserBuild() {
 #ifdef PANDA3DS_USER_BUILD
 		return true;
 #endif
 		return false;
 	}
 	static void debug_printf(const char* fmt, ...) {
 		if constexpr (buildingInDebugMode()) {
 			std::va_list args;
--- a/include/opengl.hpp
+++ b/include/opengl.hpp
@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2022 PCSX-Redux authors                                 *
+ *   Copyright (C) 2022 PCSX-Redux & Panda3DS authors                      *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
@ -524,6 +524,9 @@ namespace OpenGL {
    static void enableStencil() { glEnable(GL_STENCIL_TEST); }
    static void disableStencil() { glDisable(GL_STENCIL_TEST); }
    static void enableClipPlane(GLuint index) { glEnable(GL_CLIP_DISTANCE0 + index); }
 	static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); }
    static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast<GLenum>(func)); }
    enum Primitives {
--- a/include/renderer_gl/renderer_gl.hpp
+++ b/include/renderer_gl/renderer_gl.hpp
@ -32,6 +32,7 @@ class Renderer {
 	GLint textureEnvScaleLoc = -1;
 	GLint textureEnvUpdateBufferLoc = -1;
 	GLint textureEnvBufferColorLoc = -1;
 	GLint picaRegLoc = -1;
 	// Depth configuration uniform locations
 	GLint depthOffsetLoc = -1;
@ -66,6 +67,7 @@ class Renderer {
 	const std::array<u32, regNum>& regs;
 	OpenGL::Texture screenTexture;
 	GLuint lightLUTTextureArray;
 	OpenGL::Framebuffer screenFramebuffer;
 	OpenGL::Framebuffer getColourFBO();
@ -76,6 +78,7 @@ class Renderer {
 	void bindDepthBuffer();
 	void setupTextureEnvState();
 	void bindTexturesToSlots();
 	void updateLightingLUT();
  public:
 	Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
--- a/include/services/hid.hpp
+++ b/include/services/hid.hpp
@ -63,6 +63,8 @@ class HIDService {
 	MAKE_LOG_FUNCTION(log, hidLogger)
 	// Service commands
 	void disableAccelerometer(u32 messagePointer);
 	void disableGyroscopeLow(u32 messagePointer);
 	void enableAccelerometer(u32 messagePointer);
 	void enableGyroscopeLow(u32 messagePointer);
 	void getGyroscopeLowCalibrateParam(u32 messagePointer);
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@ -21,6 +21,7 @@ void GPU::reset() {
 	shaderUnit.reset();
 	shaderJIT.reset();
 	std::memset(vram, 0, vramSize);
 	lightingLUT.fill(0);
 	totalAttribCount = 0;
 	fixedAttribMask = 0;
--- a/src/core/PICA/regs.cpp
+++ b/src/core/PICA/regs.cpp
@ -24,18 +24,36 @@ void GPU::writeReg(u32 address, u32 value) {
 }
 u32 GPU::readInternalReg(u32 index) {
-	if (index > regNum) {
+	using namespace PICA::InternalRegs;
 	if (index > regNum) [[unlikely]] {
 		Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index);
 		return 0;
 	}
 	else if (index >= LightingLUTData0 && index <= LightingLUTData7) [[unlikely]] {
 		const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
 		const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
 		uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
 		uint32_t value = 0xffffffff;                    // Return value
 		if (lutID < PICA::Lights::LUT_Count) {
 			value = lightingLUT[lutID * 256 + lutIndex];
 		}
 		// Increment the bottom 8 bits of the lighting LUT index register
 		lutIndex += 1;
 		regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff);
 		return value;
 	}
 	return regs[index];
 }
 void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 	using namespace PICA::InternalRegs;
-	if (index > regNum) {
+	if (index > regNum) [[unlikely]] {
 		Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value);
 		return;
 	}
@ -91,6 +109,30 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 			break;
 		}
 		case LightingLUTData0:
 		case LightingLUTData1:
 		case LightingLUTData2:
 		case LightingLUTData3:
 		case LightingLUTData4:
 		case LightingLUTData5:
 		case LightingLUTData6:
 		case LightingLUTData7:{
 			const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
 			const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
 			uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
 			if (lutID < PICA::Lights::LUT_Count) {
 				lightingLUT[lutID * 256 + lutIndex] = newValue;
 				lightingLUTDirty = true;
 			}
 			// Increment the bottom 8 bits of the lighting LUT index register
 			lutIndex += 1;
 			regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff);
 			break;
 		}
 		case VertexFloatUniformIndex:
 			shaderUnit.vs.setFloatUniformIndex(value);
 			break;
--- a/src/core/kernel/directory_operations.cpp
+++ b/src/core/kernel/directory_operations.cpp
@ -33,7 +33,7 @@ void Kernel::readDirectory(u32 messagePointer, Handle directory) {
 	const u32 entryCount = mem.read32(messagePointer + 4);
 	const u32 outPointer = mem.read32(messagePointer + 12);
 	logFileIO("Directory::Read (handle = %X, entry count = %d, out pointer = %08X)\n", directory, entryCount, outPointer);
-	Helpers::panic("Unimplemented FsDir::Read");
+	Helpers::panicDev("Unimplemented FsDir::Read");
 	mem.write32(messagePointer + 4, Result::Success);
 	mem.write32(messagePointer + 8, 0);
--- a/src/core/kernel/idle_thread.cpp
+++ b/src/core/kernel/idle_thread.cpp
@ -59,12 +59,12 @@ void Kernel::setupIdleThread() {
 	t.fpscr = FPSCR::ThreadDefault;
 	// Our idle thread should have as low of a priority as possible, because, well, it's an idle thread.
-	// We handle this by giving it a priority of 0xff, which is lower than is actually allowed for user threads
+	// We handle this by giving it a priority of 0x40, which is lower than is actually allowed for user threads
-	// (High priority value = low priority)
+	// (High priority value = low priority). This is the same priority used in the retail kernel.
-	t.priority = 0xff;
+	t.priority = 0x40;
 	t.status = ThreadStatus::Ready;
 	// Add idle thread to the list of thread indices
 	threadIndices.push_back(idleThreadIndex);
 	sortThreads();
-}
+}
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@ -44,24 +44,21 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 	exheaderInfo.offset = info.offset + 0x200;
 	exheaderInfo.size = exheaderSize;
 	exheaderInfo.hashRegionSize = 0;
 	exheaderInfo.encryptionInfo = std::nullopt;
 	exeFS.offset = info.offset + u64(*(u32*)&header[0x1A0]) * mediaUnit;
 	exeFS.size = u64(*(u32*)&header[0x1A4]) * mediaUnit;
 	exeFS.hashRegionSize = u64(*(u32*)&header[0x1A8]) * mediaUnit;
 	exeFS.encryptionInfo = std::nullopt;
 	romFS.offset = info.offset + u64(*(u32*)&header[0x1B0]) * mediaUnit;
 	romFS.size = u64(*(u32*)&header[0x1B4]) * mediaUnit;
 	romFS.hashRegionSize = u64(*(u32*)&header[0x1B8]) * mediaUnit;
 	romFS.encryptionInfo = std::nullopt;
 	// Shows whether we got the primary and secondary keys correctly
 	bool gotCryptoKeys = true;
 	if (encrypted) {
 		if (!aesEngine.haveKeys()) {
 			Helpers::panic(
 				"Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n"
 				"app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!"
 			);
 			return false;
 		}
 		Crypto::AESKey primaryKeyY;
 		Crypto::AESKey secondaryKeyY;
 		std::memcpy(primaryKeyY.data(), header, primaryKeyY.size());
@ -69,44 +66,36 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 		if (!seedCrypto) {
 			secondaryKeyY = primaryKeyY;
 		} else {
-			Helpers::panic("Seed crypto is not supported");
+			Helpers::warn("Seed crypto is not supported");
-			return false;
+			gotCryptoKeys = false;
 		}
 		auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY);
 		if (!primaryResult.first) {
 			Helpers::panic("getPrimaryKey failed!");
 			return false;
 		}
 		Crypto::AESKey primaryKey = primaryResult.second;
 		auto secondaryResult = getSecondaryKey(aesEngine, secondaryKeyY);
-		if (!secondaryResult.first) {
+		if (!primaryResult.first || !secondaryResult.first) {
-			Helpers::panic("getSecondaryKey failed!");
+			gotCryptoKeys = false;
-			return false;
+		} else {
 			Crypto::AESKey primaryKey = primaryResult.second;
 			Crypto::AESKey secondaryKey = secondaryResult.second;
 			EncryptionInfo encryptionInfoTmp;
 			encryptionInfoTmp.normalKey = primaryKey;
 			encryptionInfoTmp.initialCounter.fill(0);
 			for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
 				encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
 			}
 			encryptionInfoTmp.initialCounter[8] = 1;
 			exheaderInfo.encryptionInfo = encryptionInfoTmp;
 			encryptionInfoTmp.initialCounter[8] = 2;
 			exeFS.encryptionInfo = encryptionInfoTmp;
 			encryptionInfoTmp.normalKey = secondaryKey;
 			encryptionInfoTmp.initialCounter[8] = 3;
 			romFS.encryptionInfo = encryptionInfoTmp;
 		}
 		Crypto::AESKey secondaryKey = secondaryResult.second;
 		EncryptionInfo encryptionInfoTmp;
 		encryptionInfoTmp.normalKey = primaryKey;
 		encryptionInfoTmp.initialCounter.fill(0);
 		for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
 			encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
 		}
 		encryptionInfoTmp.initialCounter[8] = 1;
 		exheaderInfo.encryptionInfo = encryptionInfoTmp;
 		encryptionInfoTmp.initialCounter[8] = 2;
 		exeFS.encryptionInfo = encryptionInfoTmp;
 		encryptionInfoTmp.normalKey = secondaryKey;
 		encryptionInfoTmp.initialCounter[8] = 3;
 		romFS.encryptionInfo = encryptionInfoTmp;
 	}
 	if (exheaderSize != 0) {
@ -125,9 +114,28 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 		if (u32(programID) == u32(jumpID) && encrypted) {
 			printf("NCSD is supposedly ecrypted but not actually encrypted\n");
 			encrypted = false;
 			// Cartridge is not actually encrypted, set all of our encryption info structures to nullopt
 			exheaderInfo.encryptionInfo = std::nullopt;
 			romFS.encryptionInfo = std::nullopt;
 			exeFS.encryptionInfo = std::nullopt;
 		}
 		// If it's truly encrypted, we need to read section again.
 		if (encrypted) {
 			if (!aesEngine.haveKeys()) {
 				Helpers::panic(
 					"Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n"
 					"app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!"
 				);
 				return false;
 			}
 			if (!gotCryptoKeys) {
 				Helpers::panic("ROM is encrypted but it seems we couldn't get either the primary or the secondary key");
 				return false;
 			}
 			auto [success, bytes] = readFromFile(file, exheaderInfo, &exheader[0], 0, exheaderSize);
 			if (!success || bytes != exheaderSize) {
 				printf("Failed to read Extended NCCH header\n");
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -3,6 +3,7 @@
 #include "resource_limits.hpp"
 #include <cassert>
 #include <chrono> // For time since epoch
 #include <ctime>
 using namespace KernelMemoryTypes;
@ -424,9 +425,20 @@ void Memory::mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size) {
 u64 Memory::timeSince3DSEpoch() {
 	using namespace std::chrono;
-	// ms since Jan 1 1970
+	std::time_t rawTime = std::time(nullptr); // Get current UTC time
-	milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
+	auto localTime = std::localtime(&rawTime); // Convert to local time
-	// ms between Jan 1 1900 and Jan 1 1970 (2208988800 seconds elapsed between the two)
+
-	constexpr u64 offset = 2208988800ull * 1000;
+	bool daylightSavings = localTime->tm_isdst > 0; // Get if time includes DST
-	return ms.count() + offset;
+	localTime = std::gmtime(&rawTime);
 	// Use gmtime + mktime to calculate difference between local time and UTC
 	auto timezoneDifference = rawTime - std::mktime(localTime);
 	if (daylightSavings) {
 		timezoneDifference += 60ull * 60ull; // Add 1 hour (60 seconds * 60 minutes)
 	}
 	// seconds between Jan 1 1900 and Jan 1 1970
 	constexpr u64 offset = 2208988800ull;
 	milliseconds ms = duration_cast<milliseconds>(seconds(rawTime + timezoneDifference + offset));
 	return ms.count();
 }
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@ -5,29 +5,42 @@
 using namespace Floats;
 using namespace Helpers;
-
+using namespace PICA;
 // This is all hacked up to display our first triangle
 const char* vertexShader = R"(
 	#version 410 core
-	layout (location = 0) in vec4 a_coords;
+	layout (location = 0) in vec4  a_coords;
-	layout (location = 1) in vec4 a_vertexColour;
+	layout (location = 1) in vec4  a_quaternion;
-	layout (location = 2) in vec2 a_texcoord0;
+	layout (location = 2) in vec4  a_vertexColour;
-	layout (location = 3) in vec2 a_texcoord1;
+	layout (location = 3) in vec2  a_texcoord0;
-	layout (location = 4) in float a_texcoord0_w;
+	layout (location = 4) in vec2  a_texcoord1;
-	layout (location = 5) in vec2 a_texcoord2;
+	layout (location = 5) in float a_texcoord0_w;
 	layout (location = 6) in vec3  a_view;
 	layout (location = 7) in vec2  a_texcoord2;
 	out vec3 v_normal;
 	out vec3 v_tangent;
 	out vec3 v_bitangent;
 	out vec4 v_colour;
 	out vec3 v_texcoord0;
 	out vec2 v_texcoord1;
 	out vec3 v_view;
 	out vec2 v_texcoord2;
 	flat out vec4 v_textureEnvColor[6];
 	flat out vec4 v_textureEnvBufferColor;
 	out float gl_ClipDistance[2];
 	// TEV uniforms
 	uniform uint u_textureEnvColor[6];
 	uniform uint u_textureEnvBufferColor;
 	uniform uint u_picaRegs[0x200 - 0x47];
 	// Helper so that the implementation of u_pica_regs can be changed later
 	uint readPicaReg(uint reg_addr){
 		return u_picaRegs[reg_addr - 0x47];
 	}
 	vec4 abgr8888ToVec4(uint abgr) {
 		const float scale = 1.0 / 255.0;
@ -40,6 +53,31 @@ const char* vertexShader = R"(
 		);
 	}
 	vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){
 		vec3 u = q.xyz;
 		float s = q.w;
 		return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v  + 2.0 * s * cross(u, v);
 	}
 	// Convert an arbitrary-width floating point literal to an f32
 	float decodeFP(uint hex, uint E, uint M){
 		uint width = M + E + 1u;
 		uint bias = 128u - (1u << (E - 1u));
 		uint exponent = (hex >> M) & ((1u << E) - 1u);
 		uint mantissa = hex & ((1u << M) - 1u);
 		uint sign = (hex >> (E + M)) << 31u;
 		if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
 			if (exponent == (1u << E) - 1u) exponent = 255u;
 			else exponent += bias;
 			hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
 		} else {
 			hex = sign;
 		}
        return uintBitsToFloat(hex);
 	}
 	void main() {
 		gl_Position = a_coords;
 		v_colour = a_vertexColour;
@ -48,21 +86,45 @@ const char* vertexShader = R"(
 		v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
 		v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
 		v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
 		v_view = a_view; 
 		v_normal    = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
 		v_tangent   = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
 		v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));
 		for (int i = 0; i < 6; i++) {
 			v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
 		}
 		v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor);
 		// Parse clipping plane registers
 		// The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 
 		// With n = (A, B, C) being the normal vector and D being the origin point distance
 		// Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1]
 		vec4 clipData = vec4(
 			decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16),
 			decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16),
 			decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16),
 			decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16)
 		);
 		// There's also another, always-on clipping plane based on vertex z
 		gl_ClipDistance[0] = -a_coords.z;
 		gl_ClipDistance[1] = dot(clipData, a_coords);
 	}
 )";
 const char* fragmentShader = R"(
 	#version 410 core
 	in vec3 v_tangent;
 	in vec3 v_normal;
 	in vec3 v_bitangent;
 	in vec4 v_colour;
 	in vec3 v_texcoord0;
 	in vec2 v_texcoord1;
 	in vec3 v_view;
 	in vec2 v_texcoord2;
 	flat in vec4 v_textureEnvColor[6];
 	flat in vec4 v_textureEnvBufferColor;
@ -87,6 +149,14 @@ const char* fragmentShader = R"(
 	uniform sampler2D u_tex0;
 	uniform sampler2D u_tex1;
 	uniform sampler2D u_tex2;
 	uniform sampler1DArray u_tex_lighting_lut;
 	uniform uint u_picaRegs[0x200 - 0x47];
 	// Helper so that the implementation of u_pica_regs can be changed later
 	uint readPicaReg(uint reg_addr){
 		return u_picaRegs[reg_addr - 0x47];
 	}
 	vec4 tevSources[16];
 	vec4 tevNextPreviousBuffer;
@ -190,9 +260,196 @@ const char* fragmentShader = R"(
 		return result;
 	}
 	#define D0_LUT 0u
 	#define D1_LUT 1u
 	#define SP_LUT 2u
 	#define FR_LUT 3u
 	#define RB_LUT 4u
 	#define RG_LUT 5u
 	#define RR_LUT 6u
 	float lutLookup(uint lut, uint light, float value){
 		if (lut >= FR_LUT && lut <= RR_LUT)
 			lut -= 1;
 		if (lut==SP_LUT)
 			lut = light + 8;
 		return texture(u_tex_lighting_lut, vec2(value, lut)).r; 
 	}
 	vec3 regToColor(uint reg) {
 		// Normalization scale to convert from [0...255] to [0.0...1.0]
 		const float scale = 1.0 / 255.0;
 		return scale * vec3(
 			float(bitfieldExtract(reg, 20, 8)),
 			float(bitfieldExtract(reg, 10, 8)),
 			float(bitfieldExtract(reg, 00, 8))
 		);
 	}
 	// Convert an arbitrary-width floating point literal to an f32
 	float decodeFP(uint hex, uint E, uint M){
 		uint width = M + E + 1u;
 		uint bias = 128u - (1u << (E - 1u));
 		uint exponent = (hex >> M) & ((1u << E) - 1u);
 		uint mantissa = hex & ((1u << M) - 1u);
 		uint sign = (hex >> (E + M)) << 31u;
 		if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
 			if (exponent == (1u << E) - 1u) exponent = 255u;
 			else exponent += bias;
 			hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
 		} else {
 			hex = sign;
 		}
        return uintBitsToFloat(hex);
 	}
 	// Implements the following algorthm: https://mathb.in/26766
 	void calcLighting(out vec4 primary_color, out vec4 secondary_color){
-		primary_color = vec4(vec3(0.5) ,1.0);
+		// Quaternions describe a transformation from surface-local space to eye space.
-		secondary_color = vec4(vec3(0.5) ,1.0);
+		// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
 		// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
 		vec3 normal    = normalize(v_normal   );
 		vec3 tangent   = normalize(v_tangent  );
 		vec3 bitangent = normalize(v_bitangent);
 		vec3 view = normalize(v_view);
 		uint GPUREG_LIGHTING_ENABLE  = readPicaReg(0x008F);
 		if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){
 			primary_color = secondary_color = vec4(1.0);
 			return;
 		}
 		uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0);
 		uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1;
 		uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9);
 		primary_color   = vec4(vec3(0.0),1.0);
 		secondary_color = vec4(vec3(0.0),1.0);
 		primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
 		uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0);
 		uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1);
 		uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3);
 		uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4);
 		uint GPUREG_LIGHTING_LUTINPUT_SCALE =  readPicaReg(0x01D2);
 		float d[7];
 		bool error_unimpl = false;
 		for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){
 			uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3);
 			uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id);
 			uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id);
 			uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id);
 			uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id);
 			uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id);
 			uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id);
 			uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id);
 			vec3 light_vector = normalize(vec3(
 				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10),
 				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10),
 				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
 			));
 			// Positional Light
 			if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0)
 				error_unimpl = true;
 			vec3 half_vector = normalize(normalize(light_vector) + view);
 			for(int c = 0; c < 7; c++){
 				if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){
 					uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
 					float scale = float(1u << scale_id);
 					if (scale_id >= 6u)
 						scale/=256.0;
 					uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
 					if (input_id == 0u) d[c] = dot(normal,half_vector);
 					else if (input_id == 1u) d[c] = dot(view,half_vector);
 					else if (input_id == 2u) d[c] = dot(normal,view);
 					else if (input_id == 3u) d[c] = dot(light_vector,normal);
 					else if (input_id == 4u){
 						uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id);
 						uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id);
 						vec3 spot_light_vector = normalize(vec3(
 							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11),
 							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11),
 							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11)
 						));
 						d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
 					} else if (input_id == 5u) {
 						d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
 						error_unimpl = true;
 					} else {
 						d[c] = 1.0;
 					}
 					d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale;
 					if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) 
 						d[c] = abs(d[c]);
 				} else {
 					d[c] = 1.0;
 				}
 			}
 			uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
 			if (lookup_config == 0) {
 				d[D1_LUT] = 0.0;
 				d[FR_LUT] = 0.0;
 				d[RG_LUT]= d[RB_LUT] = d[RR_LUT];
 			} else if(lookup_config == 1) {
 				d[D0_LUT] = 0.0;
 				d[D1_LUT] = 0.0;
 				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
 			} else if(lookup_config == 2) {
 				d[FR_LUT] = 0.0;
 				d[SP_LUT] = 0.0;
 				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
 			} else if(lookup_config == 3) {
 				d[SP_LUT] = 0.0;
 				d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0;
 			} else if (lookup_config == 4) {
 				d[FR_LUT] = 0.0;
 			} else if (lookup_config == 5) {
 				d[D1_LUT] = 0.0;
 			} else if (lookup_config == 6) {
 				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
 			}
 			float distance_factor = 1.0; // a
 			float indirect_factor = 1.0; // fi
 			float shadow_factor = 1.0;   // o
 			float NdotL = dot(normal, light_vector); //Li dot N
 			// Two sided diffuse
 			if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL);
 			else NdotL = abs(NdotL);
 			float light_factor =  distance_factor*d[SP_LUT]*indirect_factor*shadow_factor;
 			primary_color.rgb   += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL);
 			secondary_color.rgb += light_factor * (
 									 regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
 									 regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])
 									);
 		}	
 		uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
 		uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
 		if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
 		if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
 		if (error_unimpl) {
 			secondary_color = primary_color = vec4(1.0,0.,1.0,1.0);
 		}
 	}
 	void main() {
@ -232,6 +489,8 @@ const char* fragmentShader = R"(
 		if (tevUnimplementedSourceFlag) {
 			 // fragColour = vec4(1.0, 0.0, 1.0, 1.0);
 		}
 		// fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
 		// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
 		// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
@ -371,11 +630,13 @@ void Renderer::initGraphicsContext() {
 	depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
 	depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
 	depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
 	picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");
-	// Init sampler objects
+	// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
 	OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
 	OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
@ -392,21 +653,27 @@ void Renderer::initGraphicsContext() {
 	// Position (x, y, z, w) attributes
 	vao.setAttributeFloat<float>(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions));
 	vao.enableAttribute(0);
-	// Colour attribute
+	// Quaternion attribute
-	vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour));
+	vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion));
 	vao.enableAttribute(1);
-	// UV 0 attribute
+	// Colour attribute
-	vao.setAttributeFloat<float>(2, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0));
+	vao.setAttributeFloat<float>(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour));
 	vao.enableAttribute(2);
-	// UV 1 attribute
+	// UV 0 attribute
-	vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1));
+	vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0));
 	vao.enableAttribute(3);
-	// UV 0 W-component attribute
+	// UV 1 attribute
-	vao.setAttributeFloat<float>(4, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w));
+	vao.setAttributeFloat<float>(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1));
 	vao.enableAttribute(4);
-	// UV 2 attribute
+	// UV 0 W-component attribute
-	vao.setAttributeFloat<float>(5, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2));
+	vao.setAttributeFloat<float>(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w));
 	vao.enableAttribute(5);
 	// View
 	vao.setAttributeFloat<float>(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view));
 	vao.enableAttribute(6);
 	// UV 2 attribute
 	vao.setAttributeFloat<float>(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2));
 	vao.enableAttribute(7);
 	dummyVBO.create();
 	dummyVAO.create();
@ -414,6 +681,8 @@ void Renderer::initGraphicsContext() {
 	// Create texture and framebuffer for the 3DS screen
 	const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320
 	const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
 	glGenTextures(1,&lightLUTTextureArray);
 	auto prevTexture = OpenGL::getTex2D();
 	screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
@ -543,6 +812,8 @@ void Renderer::bindTexturesToSlots() {
 		tex.bind();
 	}
 	glActiveTexture(GL_TEXTURE0 + 3);
 	glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
 	glActiveTexture(GL_TEXTURE0);
 	// Update the texture unit configuration uniform if it changed
@ -552,6 +823,24 @@ void Renderer::bindTexturesToSlots() {
 		glUniform1ui(texUnitConfigLoc, texUnitConfig);
 	}
 }
 void Renderer::updateLightingLUT(){
 	std::array<u16, GPU::LightingLutSize> u16_lightinglut; 
 	for(int i = 0; i < gpu.lightingLUT.size(); i++){
 		uint64_t value =  gpu.lightingLUT[i] & ((1 << 12) - 1);
 		u16_lightinglut[i] = value * 65535 / 4095; 
 	} 
 	glActiveTexture(GL_TEXTURE0 + 3);
 	glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
 	glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
 	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
 	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
 	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 	glActiveTexture(GL_TEXTURE0);
 	gpu.lightingLUTDirty = false;
 }
 void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) {
 	// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
@ -576,6 +865,11 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 		glUniform1ui(alphaControlLoc, alphaControl);
 	}
 	OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled
 	if (regs[PICA::InternalRegs::ClipEnable] & 1) {
 		OpenGL::enableClipPlane(1);
 	}
 	setupBlending();
 	OpenGL::Framebuffer poop = getColourFBO();
 	poop.bind(OpenGL::DrawAndReadFramebuffer);
@ -614,6 +908,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 	setupTextureEnvState();
 	bindTexturesToSlots();
 	// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47)
 	// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
 	glUniform1uiv(picaRegLoc, 0x200 - 0x47, &regs[0x47]);
 	if (gpu.lightingLUTDirty) {
 		updateLightingLUT();
 	}
 	// TODO: Actually use this
 	float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;
 	float viewportHeight = f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0;
@ -644,7 +946,6 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 constexpr u32 topScreenBuffer = 0x1f000000;
 constexpr u32 bottomScreenBuffer = 0x1f05dc00;
 // Quick hack to display top screen for now
 void Renderer::display() {
 	OpenGL::disableScissor();
@ -700,7 +1001,9 @@ void Renderer::bindDepthBuffer() {
 		tex = depthBufferCache.add(sampleBuffer).texture.m_handle;
 	}
-	if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) Helpers::panic("TODO: Should we remove stencil attachment?");
+	if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) {
 		Helpers::panicDev("TODO: Should we remove stencil attachment?");
 	}
 	auto attachment = depthBufferFormat == PICA::DepthFmt::Depth24Stencil8 ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
 	glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0);
 }
@ -738,6 +1041,8 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32
 	OpenGL::disableBlend();
 	OpenGL::disableDepth();
 	OpenGL::disableScissor();
 	OpenGL::disableClipPlane(0);
 	OpenGL::disableClipPlane(1);
 	displayProgram.use();
 	// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
--- a/src/core/services/apt.cpp
+++ b/src/core/services/apt.cpp
@ -81,7 +81,10 @@ void APTService::handleSyncRequest(u32 messagePointer) {
 		case APTCommands::SetApplicationCpuTimeLimit: setApplicationCpuTimeLimit(messagePointer); break;
 		case APTCommands::SetScreencapPostPermission: setScreencapPostPermission(messagePointer); break;
 		case APTCommands::TheSmashBrosFunction: theSmashBrosFunction(messagePointer); break;
-		default: Helpers::panic("APT service requested. Command: %08X\n", command);
+		default:
 			Helpers::panicDev("APT service requested. Command: %08X\n", command);
 			mem.write32(messagePointer + 4, Result::Success);
 			break;
 	}
 }
--- a/src/core/services/cecd.cpp
+++ b/src/core/services/cecd.cpp
@ -16,7 +16,10 @@ void CECDService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
 		case CECDCommands::GetInfoEventHandle: getInfoEventHandle(messagePointer); break;
-		default: Helpers::panic("CECD service requested. Command: %08X\n", command);
+		default:
 			Helpers::panicDev("CECD service requested. Command: %08X\n", command);
 			mem.write32(messagePointer + 4, Result::Success);
 			break;
 	}
 }
--- a/src/core/services/hid.cpp
+++ b/src/core/services/hid.cpp
@ -7,7 +7,9 @@ namespace HIDCommands {
 	enum : u32 {
 		GetIPCHandles = 0x000A0000,
 		EnableAccelerometer = 0x00110000,
 		DisableAccelerometer = 0x00120000,
 		EnableGyroscopeLow = 0x00130000,
 		DisableGyroscopeLow = 0x00140000,
 		GetGyroscopeLowRawToDpsCoefficient = 0x00150000,
 		GetGyroscopeLowCalibrateParam = 0x00160000
 	};
@ -36,6 +38,8 @@ void HIDService::reset() {
 void HIDService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
 		case HIDCommands::DisableAccelerometer: disableAccelerometer(messagePointer); break;
 		case HIDCommands::DisableGyroscopeLow: disableGyroscopeLow(messagePointer); break;
 		case HIDCommands::EnableAccelerometer: enableAccelerometer(messagePointer); break;
 		case HIDCommands::EnableGyroscopeLow: enableGyroscopeLow(messagePointer); break;
 		case HIDCommands::GetGyroscopeLowCalibrateParam: getGyroscopeLowCalibrateParam(messagePointer); break;
@ -53,6 +57,14 @@ void HIDService::enableAccelerometer(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }
 void HIDService::disableAccelerometer(u32 messagePointer) {
 	log("HID::DisableAccelerometer\n");
 	accelerometerEnabled = false;
 	mem.write32(messagePointer, IPC::responseHeader(0x12, 1, 0));
 	mem.write32(messagePointer + 4, Result::Success);
 }
 void HIDService::enableGyroscopeLow(u32 messagePointer) {
 	log("HID::EnableGyroscopeLow\n");
 	gyroEnabled = true;
@ -61,6 +73,14 @@ void HIDService::enableGyroscopeLow(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }
 void HIDService::disableGyroscopeLow(u32 messagePointer) {
 	log("HID::DisableGyroscopeLow\n");
 	gyroEnabled = false;
 	mem.write32(messagePointer, IPC::responseHeader(0x14, 1, 0));
 	mem.write32(messagePointer + 4, Result::Success);
 }
 void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) {
 	log("HID::GetGyroscopeLowCalibrateParam\n");
 	constexpr s16 unit = 6700; // Approximately from Citra which took it from hardware