Merge pull request #61 from skylersaleh/lighting_impl

Implement Fragment Lighting (and clipping planes)
2025-07-01 04:46:20 +12:00 · 2023-07-04 20:52:16 +03:00 · 2023-07-04 20:52:16 +03:00 · ee49f89779
commit ee49f89779
parent 14d287dbd7 aa27389473
20 changed files with 582 additions and 98 deletions
--- a/.github/workflows/Linux_Build.yml
+++ b/.github/workflows/Linux_Build.yml
@ -26,7 +26,7 @@ jobs:
    - name: Configure CMake
      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DENABLE_USER_BUILD=ON

    - name: Build
      # Build your program with the given configuration
--- a/.github/workflows/MacOS_Build.yml
+++ b/.github/workflows/MacOS_Build.yml
@ -23,13 +23,10 @@ jobs:
    - name: Fetch submodules
      run: git submodule update --init --recursive

-    - name: Install LLVM # MacOS comes with "AppleClang" instead of regular Clang, and it can't build the project because no proper C++20
-      run: brew install llvm
-
    - name: Configure CMake
      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON

    - name: Build
      # Build your program with the given configuration
--- a/.github/workflows/Windows_Build.yml
+++ b/.github/workflows/Windows_Build.yml
@ -26,7 +26,7 @@ jobs:
    - name: Configure CMake
      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
-      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
+      run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DENABLE_USER_BUILD=ON

    - name: Build
      # Build your program with the given configuration
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)

-if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fbracket-depth=4096")
 endif()

@ -13,8 +13,14 @@ endif()
 project(Alber)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})

+if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-nonliteral -Wno-format-security")
+endif() 
+
+option(DISABLE_PANIC_DEV "Make a build with fewer and less intrusive asserts" OFF)
 option(GPU_DEBUG_INFO "Enable additional GPU debugging info" OFF)
 option(ENABLE_LTO "Enable link-time optimization" OFF)
+option(ENABLE_USER_BUILD "Make a user-facing build. These builds have various assertions disabled, LTO, and more" OFF)

 include_directories(${PROJECT_SOURCE_DIR}/include/)
 include_directories(${PROJECT_SOURCE_DIR}/include/kernel)
@ -159,7 +165,7 @@ source_group("Source Files\\Third Party" FILES ${THIRD_PARTY_SOURCE_FILES})
 add_executable(Alber ${SOURCE_FILES} ${FS_SOURCE_FILES} ${CRYPTO_SOURCE_FILES} ${KERNEL_SOURCE_FILES} ${LOADER_SOURCE_FILES} ${SERVICE_SOURCE_FILES}
 ${PICA_SOURCE_FILES} ${RENDERER_GL_SOURCE_FILES} ${THIRD_PARTY_SOURCE_FILES} ${HEADER_FILES})

-if(ENABLE_LTO)
+if(ENABLE_LTO OR ENABLE_USER_BUILD)
  set_target_properties(Alber PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
 endif()

@ -167,4 +173,12 @@ target_link_libraries(Alber PRIVATE dynarmic SDL2-static glad cryptopp)

 if(GPU_DEBUG_INFO)
  target_compile_definitions(Alber PRIVATE GPU_DEBUG_INFO=1)
-endif()
+endif()
+
+if(ENABLE_USER_BUILD)
+    target_compile_definitions(Alber PRIVATE PANDA3DS_USER_BUILD=1)
+endif()
+
+if(ENABLE_USER_BUILD OR DISABLE_PANIC_DEV)
+    target_compile_definitions(Alber PRIVATE PANDA3DS_LIMITED_PANICS=1)
+endif()
--- a/include/PICA/gpu.hpp
+++ b/include/PICA/gpu.hpp
@ -69,7 +69,18 @@ class GPU {

 	Renderer renderer;
 	PicaVertex getImmediateModeVertex();
-public:
+
+  public:
+	// 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT
+	// Encoded in PICA native format
+	static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256;
+	std::array<uint32_t, LightingLutSize> lightingLUT;
+
+	// Used to prevent uploading the lighting_lut on every draw call
+	// Set to true when the CPU writes to the lighting_lut
+	// Set to false by the renderer when the lighting_lut is uploaded ot the GPU
+	bool lightingLUTDirty = false;
+
 	GPU(Memory& mem);
 	void initGraphicsContext() { renderer.initGraphicsContext(); }
 	void getGraphicsContext() { renderer.getGraphicsContext(); }
--- a/include/PICA/regs.hpp
+++ b/include/PICA/regs.hpp
@ -10,6 +10,13 @@ namespace PICA {
 			ViewportHeight = 0x43,
 			ViewportInvh = 0x44,

+			// Clipping plane control
+			ClipEnable = 0x47,
+			ClipData0 = 0x48,
+			ClipData1 = 0x49,
+			ClipData2 = 0x4A,
+			ClipData3 = 0x4B,
+
 			DepthScale = 0x4D,
 			DepthOffset = 0x4E,
 			ShaderOutputCount = 0x4F,
@ -55,6 +62,17 @@ namespace PICA {
 			ColourBufferLoc = 0x11D,
 			FramebufferSize = 0x11E,

+			//LightingRegs
+			LightingLUTIndex =  0x01C5,
+			LightingLUTData0 =  0x01C8,
+			LightingLUTData1 =  0x01C9,
+			LightingLUTData2 =  0x01CA,
+			LightingLUTData3 =  0x01CB,
+			LightingLUTData4 =  0x01CC,
+			LightingLUTData5 =  0x01CD,
+			LightingLUTData6 =  0x01CE,
+			LightingLUTData7 =  0x01CF,
+			
 			// Geometry pipeline registers
 			VertexAttribLoc = 0x200,
 			AttribFormatLow = 0x201,
@ -156,6 +174,34 @@ namespace PICA {
 		};
 	}

+	namespace Lights {
+		enum : u32 {
+			LUT_D0 = 0,
+			LUT_D1,
+			LUT_FR,
+			LUT_RB,
+			LUT_RG,
+			LUT_RR,
+			LUT_SP0 = 0x8,
+			LUT_SP1,
+			LUT_SP2,
+			LUT_SP3,
+			LUT_SP4,
+			LUT_SP5,
+			LUT_SP6,
+			LUT_SP7,
+			LUT_DA0 = 0x10,
+			LUT_DA1,
+			LUT_DA2,
+			LUT_DA3,
+			LUT_DA4,
+			LUT_DA5,
+			LUT_DA6,
+			LUT_DA7,
+			LUT_Count
+		};
+	}
+
 	enum class TextureFmt : u32 {
 		RGBA8 = 0x0,
 		RGB8 = 0x1,
--- a/include/helpers.hpp
+++ b/include/helpers.hpp
@ -30,24 +30,31 @@ using s32 = std::int32_t;
 using s64 = std::int64_t;

 namespace Helpers {
-	[[noreturn]] static void panic(const char* fmt, ...) {
-		std::va_list args;
-		va_start(args, fmt);
+	// Unconditional panic, unlike panicDev which does not panic on user builds
+	template <class... Args>
+	[[noreturn]] static void panic(const char* fmt, Args&&... args) {
 		std::cout << termcolor::on_red << "[FATAL] ";
-		std::vprintf(fmt, args);
+		std::printf(fmt, args...);
 		std::cout << termcolor::reset << "\n";
-		va_end(args);

 		exit(1);
 	}
+	
+#ifdef PANDA3DS_LIMITED_PANICS
+	template <class... Args>
+	static void panicDev(const char* fmt, Args&&... args) {}
+#else
+	template <class... Args>
+	[[noreturn]] static void panicDev(const char* fmt, Args&&... args) {
+		panic(fmt, args...);
+	}
+#endif

-	static void warn(const char* fmt, ...) {
-		std::va_list args;
-		va_start(args, fmt);
+	template <class... Args>
+	static void warn(const char* fmt, Args&&... args) {
 		std::cout << termcolor::on_red << "[Warning] ";
-		std::vprintf(fmt, args);
+		std::printf(fmt, args...);
 		std::cout << termcolor::reset << "\n";
-		va_end(args);
 	}

 	static constexpr bool buildingInDebugMode() {
@ -57,6 +64,13 @@ namespace Helpers {
 		return true;
 	}

+	static constexpr bool isUserBuild() {
+#ifdef PANDA3DS_USER_BUILD
+		return true;
+#endif
+		return false;
+	}
+
 	static void debug_printf(const char* fmt, ...) {
 		if constexpr (buildingInDebugMode()) {
 			std::va_list args;
--- a/include/opengl.hpp
+++ b/include/opengl.hpp
@ -1,5 +1,5 @@
 /***************************************************************************
- *   Copyright (C) 2022 PCSX-Redux authors                                 *
+ *   Copyright (C) 2022 PCSX-Redux & Panda3DS authors                      *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
@ -524,6 +524,9 @@ namespace OpenGL {
    static void enableStencil() { glEnable(GL_STENCIL_TEST); }
    static void disableStencil() { glDisable(GL_STENCIL_TEST); }

+    static void enableClipPlane(GLuint index) { glEnable(GL_CLIP_DISTANCE0 + index); }
+	static void disableClipPlane(GLuint index) { glDisable(GL_CLIP_DISTANCE0 + index); }
+
    static void setDepthFunc(DepthFunc func) { glDepthFunc(static_cast<GLenum>(func)); }

    enum Primitives {
--- a/include/renderer_gl/renderer_gl.hpp
+++ b/include/renderer_gl/renderer_gl.hpp
@ -32,6 +32,7 @@ class Renderer {
 	GLint textureEnvScaleLoc = -1;
 	GLint textureEnvUpdateBufferLoc = -1;
 	GLint textureEnvBufferColorLoc = -1;
+	GLint picaRegLoc = -1;

 	// Depth configuration uniform locations
 	GLint depthOffsetLoc = -1;
@ -66,6 +67,7 @@ class Renderer {
 	const std::array<u32, regNum>& regs;

 	OpenGL::Texture screenTexture;
+	GLuint lightLUTTextureArray;
 	OpenGL::Framebuffer screenFramebuffer;

 	OpenGL::Framebuffer getColourFBO();
@ -76,6 +78,7 @@ class Renderer {
 	void bindDepthBuffer();
 	void setupTextureEnvState();
 	void bindTexturesToSlots();
+	void updateLightingLUT();

  public:
 	Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
--- a/include/services/hid.hpp
+++ b/include/services/hid.hpp
@ -63,6 +63,8 @@ class HIDService {
 	MAKE_LOG_FUNCTION(log, hidLogger)

 	// Service commands
+	void disableAccelerometer(u32 messagePointer);
+	void disableGyroscopeLow(u32 messagePointer);
 	void enableAccelerometer(u32 messagePointer);
 	void enableGyroscopeLow(u32 messagePointer);
 	void getGyroscopeLowCalibrateParam(u32 messagePointer);
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@ -21,6 +21,7 @@ void GPU::reset() {
 	shaderUnit.reset();
 	shaderJIT.reset();
 	std::memset(vram, 0, vramSize);
+	lightingLUT.fill(0);

 	totalAttribCount = 0;
 	fixedAttribMask = 0;
--- a/src/core/PICA/regs.cpp
+++ b/src/core/PICA/regs.cpp
@ -24,18 +24,36 @@ void GPU::writeReg(u32 address, u32 value) {
 }

 u32 GPU::readInternalReg(u32 index) {
-	if (index > regNum) {
+	using namespace PICA::InternalRegs;
+
+	if (index > regNum) [[unlikely]] {
 		Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index);
 		return 0;
 	}

+	else if (index >= LightingLUTData0 && index <= LightingLUTData7) [[unlikely]] {
+		const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
+		const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
+		uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
+		uint32_t value = 0xffffffff;                    // Return value
+
+		if (lutID < PICA::Lights::LUT_Count) {
+			value = lightingLUT[lutID * 256 + lutIndex];
+		}
+
+		// Increment the bottom 8 bits of the lighting LUT index register
+		lutIndex += 1;
+		regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff);
+		return value;
+	}
+
 	return regs[index];
 }

 void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 	using namespace PICA::InternalRegs;

-	if (index > regNum) {
+	if (index > regNum) [[unlikely]] {
 		Helpers::panic("Tried to write to invalid GPU register. Index: %X, value: %08X\n", index, value);
 		return;
 	}
@ -91,6 +109,30 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
 			break;
 		}

+		case LightingLUTData0:
+		case LightingLUTData1:
+		case LightingLUTData2:
+		case LightingLUTData3:
+		case LightingLUTData4:
+		case LightingLUTData5:
+		case LightingLUTData6:
+		case LightingLUTData7:{
+			const uint32_t index = regs[LightingLUTIndex];  // Get full LUT index register
+			const uint32_t lutID = getBits<8, 5>(index);    // Get which LUT we're actually writing to
+			uint32_t lutIndex = getBits<0, 8>(index);       // And get the index inside the LUT we're writing to
+
+			if (lutID < PICA::Lights::LUT_Count) {
+				lightingLUT[lutID * 256 + lutIndex] = newValue;
+				lightingLUTDirty = true;
+			}
+
+			// Increment the bottom 8 bits of the lighting LUT index register
+			lutIndex += 1;
+			regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff);
+
+			break;
+		}
+
 		case VertexFloatUniformIndex:
 			shaderUnit.vs.setFloatUniformIndex(value);
 			break;
--- a/src/core/kernel/directory_operations.cpp
+++ b/src/core/kernel/directory_operations.cpp
@ -33,7 +33,7 @@ void Kernel::readDirectory(u32 messagePointer, Handle directory) {
 	const u32 entryCount = mem.read32(messagePointer + 4);
 	const u32 outPointer = mem.read32(messagePointer + 12);
 	logFileIO("Directory::Read (handle = %X, entry count = %d, out pointer = %08X)\n", directory, entryCount, outPointer);
-	Helpers::panic("Unimplemented FsDir::Read");
+	Helpers::panicDev("Unimplemented FsDir::Read");

 	mem.write32(messagePointer + 4, Result::Success);
 	mem.write32(messagePointer + 8, 0);
--- a/src/core/kernel/idle_thread.cpp
+++ b/src/core/kernel/idle_thread.cpp
@ -59,12 +59,12 @@ void Kernel::setupIdleThread() {
 	t.fpscr = FPSCR::ThreadDefault;

 	// Our idle thread should have as low of a priority as possible, because, well, it's an idle thread.
-	// We handle this by giving it a priority of 0xff, which is lower than is actually allowed for user threads
-	// (High priority value = low priority)
-	t.priority = 0xff;
+	// We handle this by giving it a priority of 0x40, which is lower than is actually allowed for user threads
+	// (High priority value = low priority). This is the same priority used in the retail kernel.
+	t.priority = 0x40;
 	t.status = ThreadStatus::Ready;

 	// Add idle thread to the list of thread indices
 	threadIndices.push_back(idleThreadIndex);
 	sortThreads();
-}
+}
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@ -44,24 +44,21 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 	exheaderInfo.offset = info.offset + 0x200;
 	exheaderInfo.size = exheaderSize;
 	exheaderInfo.hashRegionSize = 0;
+	exheaderInfo.encryptionInfo = std::nullopt;

 	exeFS.offset = info.offset + u64(*(u32*)&header[0x1A0]) * mediaUnit;
 	exeFS.size = u64(*(u32*)&header[0x1A4]) * mediaUnit;
 	exeFS.hashRegionSize = u64(*(u32*)&header[0x1A8]) * mediaUnit;
+	exeFS.encryptionInfo = std::nullopt;

 	romFS.offset = info.offset + u64(*(u32*)&header[0x1B0]) * mediaUnit;
 	romFS.size = u64(*(u32*)&header[0x1B4]) * mediaUnit;
 	romFS.hashRegionSize = u64(*(u32*)&header[0x1B8]) * mediaUnit;
+	romFS.encryptionInfo = std::nullopt;

+	// Shows whether we got the primary and secondary keys correctly
+	bool gotCryptoKeys = true;
 	if (encrypted) {
-		if (!aesEngine.haveKeys()) {
-			Helpers::panic(
-				"Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n"
-				"app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!"
-			);
-			return false;
-		}
-
 		Crypto::AESKey primaryKeyY;
 		Crypto::AESKey secondaryKeyY;
 		std::memcpy(primaryKeyY.data(), header, primaryKeyY.size());
@ -69,44 +66,36 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 		if (!seedCrypto) {
 			secondaryKeyY = primaryKeyY;
 		} else {
-			Helpers::panic("Seed crypto is not supported");
-			return false;
+			Helpers::warn("Seed crypto is not supported");
+			gotCryptoKeys = false;
 		}

 		auto primaryResult = getPrimaryKey(aesEngine, primaryKeyY);
-
-		if (!primaryResult.first) {
-			Helpers::panic("getPrimaryKey failed!");
-			return false;
-		}
-
-		Crypto::AESKey primaryKey = primaryResult.second;
-
 		auto secondaryResult = getSecondaryKey(aesEngine, secondaryKeyY);

-		if (!secondaryResult.first) {
-			Helpers::panic("getSecondaryKey failed!");
-			return false;
+		if (!primaryResult.first || !secondaryResult.first) {
+			gotCryptoKeys = false;
+		} else {
+			Crypto::AESKey primaryKey = primaryResult.second;
+			Crypto::AESKey secondaryKey = secondaryResult.second;
+
+			EncryptionInfo encryptionInfoTmp;
+			encryptionInfoTmp.normalKey = primaryKey;
+			encryptionInfoTmp.initialCounter.fill(0);
+
+			for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
+				encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
+			}
+			encryptionInfoTmp.initialCounter[8] = 1;
+			exheaderInfo.encryptionInfo = encryptionInfoTmp;
+
+			encryptionInfoTmp.initialCounter[8] = 2;
+			exeFS.encryptionInfo = encryptionInfoTmp;
+
+			encryptionInfoTmp.normalKey = secondaryKey;
+			encryptionInfoTmp.initialCounter[8] = 3;
+			romFS.encryptionInfo = encryptionInfoTmp;
 		}
-
-		Crypto::AESKey secondaryKey = secondaryResult.second;
-
-		EncryptionInfo encryptionInfoTmp;
-		encryptionInfoTmp.normalKey = primaryKey;
-		encryptionInfoTmp.initialCounter.fill(0);
-
-		for (std::size_t i = 1; i <= sizeof(std::uint64_t) - 1; i++) {
-			encryptionInfoTmp.initialCounter[i] = header[0x108 + sizeof(std::uint64_t) - 1 - i];
-		}
-		encryptionInfoTmp.initialCounter[8] = 1;
-		exheaderInfo.encryptionInfo = encryptionInfoTmp;
-
-		encryptionInfoTmp.initialCounter[8] = 2;
-		exeFS.encryptionInfo = encryptionInfoTmp;
-
-		encryptionInfoTmp.normalKey = secondaryKey;
-		encryptionInfoTmp.initialCounter[8] = 3;
-		romFS.encryptionInfo = encryptionInfoTmp;
 	}

 	if (exheaderSize != 0) {
@ -125,9 +114,28 @@ bool NCCH::loadFromHeader(Crypto::AESEngine &aesEngine, IOFile& file, const FSIn
 		if (u32(programID) == u32(jumpID) && encrypted) {
 			printf("NCSD is supposedly ecrypted but not actually encrypted\n");
 			encrypted = false;
+
+			// Cartridge is not actually encrypted, set all of our encryption info structures to nullopt
+			exheaderInfo.encryptionInfo = std::nullopt;
+			romFS.encryptionInfo = std::nullopt;
+			exeFS.encryptionInfo = std::nullopt;
 		}
+
 		// If it's truly encrypted, we need to read section again.
 		if (encrypted) {
+			if (!aesEngine.haveKeys()) {
+				Helpers::panic(
+					"Loaded an encrypted ROM but AES keys don't seem to have been provided correctly! Navigate to the emulator's\n"
+					"app data folder and make sure you have a sysdata directory with a file called aes_keys.txt which contains your keys!"
+				);
+				return false;
+			}
+
+			if (!gotCryptoKeys) {
+				Helpers::panic("ROM is encrypted but it seems we couldn't get either the primary or the secondary key");
+				return false;
+			}
+
 			auto [success, bytes] = readFromFile(file, exheaderInfo, &exheader[0], 0, exheaderSize);
 			if (!success || bytes != exheaderSize) {
 				printf("Failed to read Extended NCCH header\n");
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -3,6 +3,7 @@
 #include "resource_limits.hpp"
 #include <cassert>
 #include <chrono> // For time since epoch
+#include <ctime>

 using namespace KernelMemoryTypes;

@ -424,9 +425,20 @@ void Memory::mirrorMapping(u32 destAddress, u32 sourceAddress, u32 size) {
 u64 Memory::timeSince3DSEpoch() {
 	using namespace std::chrono;

-	// ms since Jan 1 1970
-	milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
-	// ms between Jan 1 1900 and Jan 1 1970 (2208988800 seconds elapsed between the two)
-	constexpr u64 offset = 2208988800ull * 1000;
-	return ms.count() + offset;
+	std::time_t rawTime = std::time(nullptr); // Get current UTC time
+	auto localTime = std::localtime(&rawTime); // Convert to local time
+
+	bool daylightSavings = localTime->tm_isdst > 0; // Get if time includes DST
+	localTime = std::gmtime(&rawTime);
+
+	// Use gmtime + mktime to calculate difference between local time and UTC
+	auto timezoneDifference = rawTime - std::mktime(localTime);
+	if (daylightSavings) {
+		timezoneDifference += 60ull * 60ull; // Add 1 hour (60 seconds * 60 minutes)
+	}
+
+	// seconds between Jan 1 1900 and Jan 1 1970
+	constexpr u64 offset = 2208988800ull;
+	milliseconds ms = duration_cast<milliseconds>(seconds(rawTime + timezoneDifference + offset));
+	return ms.count();
 }
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@ -5,29 +5,42 @@

 using namespace Floats;
 using namespace Helpers;
-
-// This is all hacked up to display our first triangle
+using namespace PICA;

 const char* vertexShader = R"(
 	#version 410 core
 	
-	layout (location = 0) in vec4 a_coords;
-	layout (location = 1) in vec4 a_vertexColour;
-	layout (location = 2) in vec2 a_texcoord0;
-	layout (location = 3) in vec2 a_texcoord1;
-	layout (location = 4) in float a_texcoord0_w;
-	layout (location = 5) in vec2 a_texcoord2;
+	layout (location = 0) in vec4  a_coords;
+	layout (location = 1) in vec4  a_quaternion;
+	layout (location = 2) in vec4  a_vertexColour;
+	layout (location = 3) in vec2  a_texcoord0;
+	layout (location = 4) in vec2  a_texcoord1;
+	layout (location = 5) in float a_texcoord0_w;
+	layout (location = 6) in vec3  a_view;
+	layout (location = 7) in vec2  a_texcoord2;

+	out vec3 v_normal;
+	out vec3 v_tangent;
+	out vec3 v_bitangent;
 	out vec4 v_colour;
 	out vec3 v_texcoord0;
 	out vec2 v_texcoord1;
+	out vec3 v_view;
 	out vec2 v_texcoord2;
 	flat out vec4 v_textureEnvColor[6];
 	flat out vec4 v_textureEnvBufferColor;

+	out float gl_ClipDistance[2];
+
 	// TEV uniforms
 	uniform uint u_textureEnvColor[6];
 	uniform uint u_textureEnvBufferColor;
+	uniform uint u_picaRegs[0x200 - 0x47];
+
+	// Helper so that the implementation of u_pica_regs can be changed later
+	uint readPicaReg(uint reg_addr){
+		return u_picaRegs[reg_addr - 0x47];
+	}

 	vec4 abgr8888ToVec4(uint abgr) {
 		const float scale = 1.0 / 255.0;
@ -40,6 +53,31 @@ const char* vertexShader = R"(
 		);
 	}

+	vec3 rotateVec3ByQuaternion(vec3 v, vec4 q){
+		vec3 u = q.xyz;
+		float s = q.w;
+		return 2.0 * dot(u, v) * u + (s * s - dot(u, u))* v  + 2.0 * s * cross(u, v);
+	}
+
+	// Convert an arbitrary-width floating point literal to an f32
+	float decodeFP(uint hex, uint E, uint M){
+		uint width = M + E + 1u;
+		uint bias = 128u - (1u << (E - 1u));
+		uint exponent = (hex >> M) & ((1u << E) - 1u);
+		uint mantissa = hex & ((1u << M) - 1u);
+		uint sign = (hex >> (E + M)) << 31u;
+
+		if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
+			if (exponent == (1u << E) - 1u) exponent = 255u;
+			else exponent += bias;
+			hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
+		} else {
+			hex = sign;
+		}
+
+        return uintBitsToFloat(hex);
+	}
+
 	void main() {
 		gl_Position = a_coords;
 		v_colour = a_vertexColour;
@ -48,21 +86,45 @@ const char* vertexShader = R"(
 		v_texcoord0 = vec3(a_texcoord0.x, 1.0 - a_texcoord0.y, a_texcoord0_w);
 		v_texcoord1 = vec2(a_texcoord1.x, 1.0 - a_texcoord1.y);
 		v_texcoord2 = vec2(a_texcoord2.x, 1.0 - a_texcoord2.y);
+		v_view = a_view; 
+
+		v_normal    = normalize(rotateVec3ByQuaternion(vec3(0.0, 0.0, 1.0), a_quaternion));
+		v_tangent   = normalize(rotateVec3ByQuaternion(vec3(1.0, 0.0, 0.0), a_quaternion));
+		v_bitangent = normalize(rotateVec3ByQuaternion(vec3(0.0, 1.0, 0.0), a_quaternion));

 		for (int i = 0; i < 6; i++) {
 			v_textureEnvColor[i] = abgr8888ToVec4(u_textureEnvColor[i]);
 		}

 		v_textureEnvBufferColor = abgr8888ToVec4(u_textureEnvBufferColor);
+
+		// Parse clipping plane registers
+		// The plane registers describe a clipping plane in the form of Ax + By + Cz + D = 0 
+		// With n = (A, B, C) being the normal vector and D being the origin point distance
+		// Therefore, for the second clipping plane, we can just pass the dot product of the clip vector and the input coordinates to gl_ClipDistance[1]
+		vec4 clipData = vec4(
+			decodeFP(readPicaReg(0x48) & 0xffffffu, 7, 16),
+			decodeFP(readPicaReg(0x49) & 0xffffffu, 7, 16),
+			decodeFP(readPicaReg(0x4A) & 0xffffffu, 7, 16),
+			decodeFP(readPicaReg(0x4B) & 0xffffffu, 7, 16)
+		);
+
+		// There's also another, always-on clipping plane based on vertex z
+		gl_ClipDistance[0] = -a_coords.z;
+		gl_ClipDistance[1] = dot(clipData, a_coords);
 	}
 )";

 const char* fragmentShader = R"(
 	#version 410 core
 	
+	in vec3 v_tangent;
+	in vec3 v_normal;
+	in vec3 v_bitangent;
 	in vec4 v_colour;
 	in vec3 v_texcoord0;
 	in vec2 v_texcoord1;
+	in vec3 v_view;
 	in vec2 v_texcoord2;
 	flat in vec4 v_textureEnvColor[6];
 	flat in vec4 v_textureEnvBufferColor;
@ -87,6 +149,14 @@ const char* fragmentShader = R"(
 	uniform sampler2D u_tex0;
 	uniform sampler2D u_tex1;
 	uniform sampler2D u_tex2;
+	uniform sampler1DArray u_tex_lighting_lut;
+
+	uniform uint u_picaRegs[0x200 - 0x47];
+
+	// Helper so that the implementation of u_pica_regs can be changed later
+	uint readPicaReg(uint reg_addr){
+		return u_picaRegs[reg_addr - 0x47];
+	}

 	vec4 tevSources[16];
 	vec4 tevNextPreviousBuffer;
@ -190,9 +260,196 @@ const char* fragmentShader = R"(
 		return result;
 	}

+	#define D0_LUT 0u
+	#define D1_LUT 1u
+	#define SP_LUT 2u
+	#define FR_LUT 3u
+	#define RB_LUT 4u
+	#define RG_LUT 5u
+	#define RR_LUT 6u
+
+	float lutLookup(uint lut, uint light, float value){
+		if (lut >= FR_LUT && lut <= RR_LUT)
+			lut -= 1;
+		if (lut==SP_LUT)
+			lut = light + 8;
+		return texture(u_tex_lighting_lut, vec2(value, lut)).r; 
+	}
+
+	vec3 regToColor(uint reg) {
+		// Normalization scale to convert from [0...255] to [0.0...1.0]
+		const float scale = 1.0 / 255.0;
+
+		return scale * vec3(
+			float(bitfieldExtract(reg, 20, 8)),
+			float(bitfieldExtract(reg, 10, 8)),
+			float(bitfieldExtract(reg, 00, 8))
+		);
+	}
+
+	// Convert an arbitrary-width floating point literal to an f32
+	float decodeFP(uint hex, uint E, uint M){
+		uint width = M + E + 1u;
+		uint bias = 128u - (1u << (E - 1u));
+		uint exponent = (hex >> M) & ((1u << E) - 1u);
+		uint mantissa = hex & ((1u << M) - 1u);
+		uint sign = (hex >> (E + M)) << 31u;
+
+		if ((hex & ((1u << (width - 1u)) - 1u)) != 0) {
+			if (exponent == (1u << E) - 1u) exponent = 255u;
+			else exponent += bias;
+			hex = sign | (mantissa << (23u - M)) | (exponent << 23u);
+		} else {
+			hex = sign;
+		}
+
+        return uintBitsToFloat(hex);
+	}
+
+	// Implements the following algorthm: https://mathb.in/26766
 	void calcLighting(out vec4 primary_color, out vec4 secondary_color){
-		primary_color = vec4(vec3(0.5) ,1.0);
-		secondary_color = vec4(vec3(0.5) ,1.0);
+		// Quaternions describe a transformation from surface-local space to eye space.
+		// In surface-local space, by definition (and up to permutation) the normal vector is (0,0,1),
+		// the tangent vector is (1,0,0), and the bitangent vector is (0,1,0).
+		vec3 normal    = normalize(v_normal   );
+		vec3 tangent   = normalize(v_tangent  );
+		vec3 bitangent = normalize(v_bitangent);
+		vec3 view = normalize(v_view);
+
+		uint GPUREG_LIGHTING_ENABLE  = readPicaReg(0x008F);
+		if (bitfieldExtract(GPUREG_LIGHTING_ENABLE, 0, 1) == 0){
+			primary_color = secondary_color = vec4(1.0);
+			return;
+		}
+
+		uint GPUREG_LIGHTING_AMBIENT = readPicaReg(0x01C0);
+		uint GPUREG_LIGHTING_NUM_LIGHTS = (readPicaReg(0x01C2) & 0x7u) +1;
+		uint GPUREG_LIGHTING_LIGHT_PERMUTATION = readPicaReg(0x01D9);
+
+		primary_color   = vec4(vec3(0.0),1.0);
+		secondary_color = vec4(vec3(0.0),1.0);
+
+		primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT);
+
+		uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0);
+		uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1);
+		uint GPUREG_LIGHTING_CONFIG0 = readPicaReg(0x01C3);
+		uint GPUREG_LIGHTING_CONFIG1 = readPicaReg(0x01C4);
+		uint GPUREG_LIGHTING_LUTINPUT_SCALE =  readPicaReg(0x01D2);
+		float d[7];
+
+		bool error_unimpl = false;
+
+		for (uint i = 0; i < GPUREG_LIGHTING_NUM_LIGHTS; i++){
+			uint light_id = bitfieldExtract(GPUREG_LIGHTING_LIGHT_PERMUTATION,int(i*3),3);
+		
+			uint GPUREG_LIGHTi_SPECULAR0 = readPicaReg(0x0140 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_SPECULAR1 = readPicaReg(0x0141 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_DIFFUSE = readPicaReg(0x0142 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_AMBIENT = readPicaReg(0x0143 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_VECTOR_LOW = readPicaReg(0x0144 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_VECTOR_HIGH= readPicaReg(0x0145 + 0x10 * light_id);
+			uint GPUREG_LIGHTi_CONFIG = readPicaReg(0x0149 + 0x10 * light_id);
+
+			vec3 light_vector = normalize(vec3(
+				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 0, 16), 5, 10),
+				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_LOW, 16, 16), 5, 10),
+				decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
+			));
+
+			// Positional Light
+			if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0)
+				error_unimpl = true;
+
+			vec3 half_vector = normalize(normalize(light_vector) + view);
+
+			for(int c = 0; c < 7; c++){
+				if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0){
+					uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE, c * 4, 3);
+					float scale = float(1u << scale_id);
+					if (scale_id >= 6u)
+						scale/=256.0;
+
+					uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT, c * 4, 3);
+					if (input_id == 0u) d[c] = dot(normal,half_vector);
+					else if (input_id == 1u) d[c] = dot(view,half_vector);
+					else if (input_id == 2u) d[c] = dot(normal,view);
+					else if (input_id == 3u) d[c] = dot(light_vector,normal);
+					else if (input_id == 4u){
+						uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146 + 0x10 * light_id);
+						uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147 + 0x10 * light_id);
+						vec3 spot_light_vector = normalize(vec3(
+							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 0, 16), 1, 11),
+							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW, 16, 16), 1, 11),
+							decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH, 0, 16), 1, 11)
+						));
+						d[c] = dot(-light_vector, spot_light_vector); // -L dot P (aka Spotlight aka SP);
+					} else if (input_id == 5u) {
+						d[c] = 1.0; // TODO: cos <greek symbol> (aka CP);
+						error_unimpl = true;
+					} else {
+						d[c] = 1.0;
+					}
+
+					d[c] = lutLookup(c, light_id, d[c] * 0.5 + 0.5) * scale;
+					if (bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS, 2 * c, 1) != 0u) 
+						d[c] = abs(d[c]);
+				} else {
+					d[c] = 1.0;
+				}
+			}
+			
+			uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
+			if (lookup_config == 0) {
+				d[D1_LUT] = 0.0;
+				d[FR_LUT] = 0.0;
+				d[RG_LUT]= d[RB_LUT] = d[RR_LUT];
+			} else if(lookup_config == 1) {
+				d[D0_LUT] = 0.0;
+				d[D1_LUT] = 0.0;
+				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
+			} else if(lookup_config == 2) {
+				d[FR_LUT] = 0.0;
+				d[SP_LUT] = 0.0;
+				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
+			} else if(lookup_config == 3) {
+				d[SP_LUT] = 0.0;
+				d[RG_LUT]= d[RB_LUT] = d[RR_LUT] = 1.0;
+			} else if (lookup_config == 4) {
+				d[FR_LUT] = 0.0;
+			} else if (lookup_config == 5) {
+				d[D1_LUT] = 0.0;
+			} else if (lookup_config == 6) {
+				d[RG_LUT] = d[RB_LUT] = d[RR_LUT];
+			}
+
+			float distance_factor = 1.0; // a
+			float indirect_factor = 1.0; // fi
+			float shadow_factor = 1.0;   // o
+
+			float NdotL = dot(normal, light_vector); //Li dot N
+
+			// Two sided diffuse
+			if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 1, 1) == 0) NdotL = max(0.0, NdotL);
+			else NdotL = abs(NdotL);
+
+			float light_factor =  distance_factor*d[SP_LUT]*indirect_factor*shadow_factor;
+
+			primary_color.rgb   += light_factor * (regToColor(GPUREG_LIGHTi_AMBIENT) + regToColor(GPUREG_LIGHTi_DIFFUSE)*NdotL);
+			secondary_color.rgb += light_factor * (
+									 regToColor(GPUREG_LIGHTi_SPECULAR0) * d[D0_LUT] +
+									 regToColor(GPUREG_LIGHTi_SPECULAR1) * d[D1_LUT] * vec3(d[RR_LUT], d[RG_LUT], d[RB_LUT])
+									);
+		}	
+		uint fresnel_output1 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 2, 1);
+		uint fresnel_output2 = bitfieldExtract(GPUREG_LIGHTING_CONFIG0, 3, 1);
+
+		if (fresnel_output1 == 1u) primary_color.a = d[FR_LUT];
+		if (fresnel_output2 == 1u) secondary_color.a = d[FR_LUT];
+
+		if (error_unimpl) {
+			secondary_color = primary_color = vec4(1.0,0.,1.0,1.0);
+		}
 	}

 	void main() {
@ -232,6 +489,8 @@ const char* fragmentShader = R"(
 		if (tevUnimplementedSourceFlag) {
 			 // fragColour = vec4(1.0, 0.0, 1.0, 1.0);
 		}
+		// fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
+

 		// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
 		// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
@ -371,11 +630,13 @@ void Renderer::initGraphicsContext() {
 	depthScaleLoc = OpenGL::uniformLocation(triangleProgram, "u_depthScale");
 	depthOffsetLoc = OpenGL::uniformLocation(triangleProgram, "u_depthOffset");
 	depthmapEnableLoc = OpenGL::uniformLocation(triangleProgram, "u_depthmapEnable");
+	picaRegLoc = OpenGL::uniformLocation(triangleProgram, "u_picaRegs");

-	// Init sampler objects
+	// Init sampler objects. Texture 0 goes in texture unit 0, texture 1 in TU 1, texture 2 in TU 2, and the light maps go in TU 3
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
 	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
+	glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);

 	OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
 	OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
@ -392,21 +653,27 @@ void Renderer::initGraphicsContext() {
 	// Position (x, y, z, w) attributes
 	vao.setAttributeFloat<float>(0, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.positions));
 	vao.enableAttribute(0);
-	// Colour attribute
-	vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour));
+	// Quaternion attribute
+	vao.setAttributeFloat<float>(1, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.quaternion));
 	vao.enableAttribute(1);
-	// UV 0 attribute
-	vao.setAttributeFloat<float>(2, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0));
+	// Colour attribute
+	vao.setAttributeFloat<float>(2, 4, sizeof(PicaVertex), offsetof(PicaVertex, s.colour));
 	vao.enableAttribute(2);
-	// UV 1 attribute
-	vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1));
+	// UV 0 attribute
+	vao.setAttributeFloat<float>(3, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0));
 	vao.enableAttribute(3);
-	// UV 0 W-component attribute
-	vao.setAttributeFloat<float>(4, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w));
+	// UV 1 attribute
+	vao.setAttributeFloat<float>(4, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord1));
 	vao.enableAttribute(4);
-	// UV 2 attribute
-	vao.setAttributeFloat<float>(5, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2));
+	// UV 0 W-component attribute
+	vao.setAttributeFloat<float>(5, 1, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord0_w));
 	vao.enableAttribute(5);
+	// View
+	vao.setAttributeFloat<float>(6, 3, sizeof(PicaVertex), offsetof(PicaVertex, s.view));
+	vao.enableAttribute(6);
+	// UV 2 attribute
+	vao.setAttributeFloat<float>(7, 2, sizeof(PicaVertex), offsetof(PicaVertex, s.texcoord2));
+	vao.enableAttribute(7);

 	dummyVBO.create();
 	dummyVAO.create();
@ -414,6 +681,8 @@ void Renderer::initGraphicsContext() {
 	// Create texture and framebuffer for the 3DS screen
 	const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320
 	const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
+	
+	glGenTextures(1,&lightLUTTextureArray);

 	auto prevTexture = OpenGL::getTex2D();
 	screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
@ -543,6 +812,8 @@ void Renderer::bindTexturesToSlots() {
 		tex.bind();
 	}

+	glActiveTexture(GL_TEXTURE0 + 3);
+	glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
 	glActiveTexture(GL_TEXTURE0);

 	// Update the texture unit configuration uniform if it changed
@ -552,6 +823,24 @@ void Renderer::bindTexturesToSlots() {
 		glUniform1ui(texUnitConfigLoc, texUnitConfig);
 	}
 }
+void Renderer::updateLightingLUT(){
+	std::array<u16, GPU::LightingLutSize> u16_lightinglut; 
+	
+	for(int i = 0; i < gpu.lightingLUT.size(); i++){
+		uint64_t value =  gpu.lightingLUT[i] & ((1 << 12) - 1);
+		u16_lightinglut[i] = value * 65535 / 4095; 
+	} 
+
+	glActiveTexture(GL_TEXTURE0 + 3);
+	glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray);
+	glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+	glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+	glActiveTexture(GL_TEXTURE0);
+	gpu.lightingLUTDirty = false;
+}

 void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) {
 	// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
@ -576,6 +865,11 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 		glUniform1ui(alphaControlLoc, alphaControl);
 	}

+	OpenGL::enableClipPlane(0); // Clipping plane 0 is always enabled
+	if (regs[PICA::InternalRegs::ClipEnable] & 1) {
+		OpenGL::enableClipPlane(1);
+	}
+
 	setupBlending();
 	OpenGL::Framebuffer poop = getColourFBO();
 	poop.bind(OpenGL::DrawAndReadFramebuffer);
@ -614,6 +908,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 	setupTextureEnvState();
 	bindTexturesToSlots();

+	// Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47)
+	// The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates
+	glUniform1uiv(picaRegLoc, 0x200 - 0x47, &regs[0x47]);
+
+	if (gpu.lightingLUTDirty) {
+		updateLightingLUT();
+	}
+
 	// TODO: Actually use this
 	float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;
 	float viewportHeight = f24::fromRaw(regs[PICA::InternalRegs::ViewportHeight] & 0xffffff).toFloat32() * 2.0;
@ -644,7 +946,6 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
 constexpr u32 topScreenBuffer = 0x1f000000;
 constexpr u32 bottomScreenBuffer = 0x1f05dc00;

-// Quick hack to display top screen for now
 void Renderer::display() {
 	OpenGL::disableScissor();

@ -700,7 +1001,9 @@ void Renderer::bindDepthBuffer() {
 		tex = depthBufferCache.add(sampleBuffer).texture.m_handle;
 	}

-	if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) Helpers::panic("TODO: Should we remove stencil attachment?");
+	if (PICA::DepthFmt::Depth24Stencil8 != depthBufferFormat) {
+		Helpers::panicDev("TODO: Should we remove stencil attachment?");
+	}
 	auto attachment = depthBufferFormat == PICA::DepthFmt::Depth24Stencil8 ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
 	glFramebufferTexture2D(GL_FRAMEBUFFER, attachment, GL_TEXTURE_2D, tex, 0);
 }
@ -738,6 +1041,8 @@ void Renderer::displayTransfer(u32 inputAddr, u32 outputAddr, u32 inputSize, u32
 	OpenGL::disableBlend();
 	OpenGL::disableDepth();
 	OpenGL::disableScissor();
+	OpenGL::disableClipPlane(0);
+	OpenGL::disableClipPlane(1);
 	displayProgram.use();

 	// Hack: Detect whether we are writing to the top or bottom screen by checking output gap and drawing to the proper part of the output texture
--- a/src/core/services/apt.cpp
+++ b/src/core/services/apt.cpp
@ -81,7 +81,10 @@ void APTService::handleSyncRequest(u32 messagePointer) {
 		case APTCommands::SetApplicationCpuTimeLimit: setApplicationCpuTimeLimit(messagePointer); break;
 		case APTCommands::SetScreencapPostPermission: setScreencapPostPermission(messagePointer); break;
 		case APTCommands::TheSmashBrosFunction: theSmashBrosFunction(messagePointer); break;
-		default: Helpers::panic("APT service requested. Command: %08X\n", command);
+		default:
+			Helpers::panicDev("APT service requested. Command: %08X\n", command);
+			mem.write32(messagePointer + 4, Result::Success);
+			break;
 	}
 }

--- a/src/core/services/cecd.cpp
+++ b/src/core/services/cecd.cpp
@ -16,7 +16,10 @@ void CECDService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
 		case CECDCommands::GetInfoEventHandle: getInfoEventHandle(messagePointer); break;
-		default: Helpers::panic("CECD service requested. Command: %08X\n", command);
+		default:
+			Helpers::panicDev("CECD service requested. Command: %08X\n", command);
+			mem.write32(messagePointer + 4, Result::Success);
+			break;
 	}
 }

--- a/src/core/services/hid.cpp
+++ b/src/core/services/hid.cpp
@ -7,7 +7,9 @@ namespace HIDCommands {
 	enum : u32 {
 		GetIPCHandles = 0x000A0000,
 		EnableAccelerometer = 0x00110000,
+		DisableAccelerometer = 0x00120000,
 		EnableGyroscopeLow = 0x00130000,
+		DisableGyroscopeLow = 0x00140000,
 		GetGyroscopeLowRawToDpsCoefficient = 0x00150000,
 		GetGyroscopeLowCalibrateParam = 0x00160000
 	};
@ -36,6 +38,8 @@ void HIDService::reset() {
 void HIDService::handleSyncRequest(u32 messagePointer) {
 	const u32 command = mem.read32(messagePointer);
 	switch (command) {
+		case HIDCommands::DisableAccelerometer: disableAccelerometer(messagePointer); break;
+		case HIDCommands::DisableGyroscopeLow: disableGyroscopeLow(messagePointer); break;
 		case HIDCommands::EnableAccelerometer: enableAccelerometer(messagePointer); break;
 		case HIDCommands::EnableGyroscopeLow: enableGyroscopeLow(messagePointer); break;
 		case HIDCommands::GetGyroscopeLowCalibrateParam: getGyroscopeLowCalibrateParam(messagePointer); break;
@ -53,6 +57,14 @@ void HIDService::enableAccelerometer(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }

+void HIDService::disableAccelerometer(u32 messagePointer) {
+	log("HID::DisableAccelerometer\n");
+	accelerometerEnabled = false;
+
+	mem.write32(messagePointer, IPC::responseHeader(0x12, 1, 0));
+	mem.write32(messagePointer + 4, Result::Success);
+}
+
 void HIDService::enableGyroscopeLow(u32 messagePointer) {
 	log("HID::EnableGyroscopeLow\n");
 	gyroEnabled = true;
@ -61,6 +73,14 @@ void HIDService::enableGyroscopeLow(u32 messagePointer) {
 	mem.write32(messagePointer + 4, Result::Success);
 }

+void HIDService::disableGyroscopeLow(u32 messagePointer) {
+	log("HID::DisableGyroscopeLow\n");
+	gyroEnabled = false;
+
+	mem.write32(messagePointer, IPC::responseHeader(0x14, 1, 0));
+	mem.write32(messagePointer + 4, Result::Success);
+}
+
 void HIDService::getGyroscopeLowCalibrateParam(u32 messagePointer) {
 	log("HID::GetGyroscopeLowCalibrateParam\n");
 	constexpr s16 unit = 6700; // Approximately from Citra which took it from hardware