Merge branch 'master' into metal2

2025-07-08 00:02:58 +12:00 · 2024-09-24 09:22:17 +02:00 · 2024-09-24 09:22:17 +02:00 · 779e30e3e5
commit 779e30e3e5
parent 8830747e90 a8041bce8c
48 changed files with 1691 additions and 242 deletions
--- a/src/core/PICA/gpu.cpp
+++ b/src/core/PICA/gpu.cpp
@ -166,7 +166,10 @@ void GPU::drawArrays() {
 	// Configures the type of primitive and the number of vertex shader outputs
 	const u32 primConfig = regs[PICA::InternalRegs::PrimitiveConfig];
 	const PICA::PrimType primType = static_cast<PICA::PrimType>(Helpers::getBits<8, 2>(primConfig));
-	if (vertexCount > Renderer::vertexBufferSize) Helpers::panic("[PICA] vertexCount > vertexBufferSize");
+	if (vertexCount > Renderer::vertexBufferSize) [[unlikely]] {
+		Helpers::warn("[PICA] vertexCount > vertexBufferSize");
+		return;
+	}

 	if ((primType == PICA::PrimType::TriangleList && vertexCount % 3) || (primType == PICA::PrimType::TriangleStrip && vertexCount < 3) ||
 		(primType == PICA::PrimType::TriangleFan && vertexCount < 3)) {
--- a/src/core/PICA/shader_gen_glsl.cpp
+++ b/src/core/PICA/shader_gen_glsl.cpp
@ -107,6 +107,7 @@ std::string FragmentGenerator::generate(const FragmentConfig& config) {
 	if (api == API::GLES) {
 		ret += R"(
 			#define USING_GLES 1
+			#define fma(a, b, c) ((a) * (b) + (c))

 			precision mediump int;
 			precision mediump float;
@ -502,7 +503,7 @@ void FragmentGenerator::compileLights(std::string& shader, const PICA::FragmentC
 					  "].distanceAttenuationScale + lightSources[" + std::to_string(lightID) + "].distanceAttenuationBias, 0.0, 1.0);\n";

 			shader += "distance_attenuation = lutLookup(" + std::to_string(16 + lightID) +
-					  ", int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n";
+					  "u, int(clamp(floor(distance_att_delta * 256.0), 0.0, 255.0)));\n";
 		}

 		compileLUTLookup(shader, config, i, spotlightLutIndex);
@ -637,7 +638,7 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme
 	if (absEnabled) {
 		bool twoSidedDiffuse = config.lighting.lights[lightIndex].twoSidedDiffuse;
 		shader += twoSidedDiffuse ? "lut_lookup_delta = abs(lut_lookup_delta);\n" : "lut_lookup_delta = max(lut_lookup_delta, 0.0);\n";
-		shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n";
+		shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + "u, int(clamp(floor(lut_lookup_delta * 256.0), 0.0, 255.0)));\n";
 		if (scale != 0) {
 			shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n";
 		}
@ -645,7 +646,7 @@ void FragmentGenerator::compileLUTLookup(std::string& shader, const PICA::Fragme
 		// Range is [-1, 1] so we need to map it to [0, 1]
 		shader += "lut_lookup_index = int(clamp(floor(lut_lookup_delta * 128.0), -128.f, 127.f));\n";
 		shader += "if (lut_lookup_index < 0) lut_lookup_index += 256;\n";
-		shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + ", lut_lookup_index);\n";
+		shader += "lut_lookup_result = lutLookup(" + std::to_string(lutIndex) + "u, lut_lookup_index);\n";
 		if (scale != 0) {
 			shader += "lut_lookup_result *= " + std::to_string(scales[scale]) + ";\n";
 		}
--- a/src/core/audio/aac_decoder.cpp
+++ b/src/core/audio/aac_decoder.cpp
@ -0,0 +1,139 @@
+#include "audio/aac_decoder.hpp"
+
+#include <aacdecoder_lib.h>
+
+#include <vector>
+using namespace Audio;
+
+void AAC::Decoder::decode(AAC::Message& response, const AAC::Message& request, AAC::Decoder::PaddrCallback paddrCallback) {
+	// Copy the command and mode fields of the request to the response
+	response.command = request.command;
+	response.mode = request.mode;
+	response.decodeResponse.size = request.decodeRequest.size;
+
+	// Write a dummy response at first. We'll be overwriting it later if decoding goes well
+	response.resultCode = AAC::ResultCode::Success;
+	response.decodeResponse.channelCount = 2;
+	response.decodeResponse.sampleCount = 1024;
+	response.decodeResponse.sampleRate = AAC::SampleRate::Rate48000;
+
+	if (!isInitialized()) {
+		initialize();
+
+		// AAC decoder failed to initialize, return dummy data and return without decoding
+		if (!isInitialized()) {
+			Helpers::warn("Failed to initialize AAC decoder");
+			return;
+		}
+	}
+
+	u8* input = paddrCallback(request.decodeRequest.address);
+	const u8* inputEnd = paddrCallback(request.decodeRequest.address + request.decodeRequest.size);
+	u8* outputLeft = paddrCallback(request.decodeRequest.destAddrLeft);
+	u8* outputRight = nullptr;
+
+	if (input == nullptr || inputEnd == nullptr || outputLeft == nullptr) {
+		Helpers::warn("Invalid pointers passed to AAC decoder");
+		return;
+	}
+
+	u32 bytesValid = request.decodeRequest.size;
+	u32 bufferSize = request.decodeRequest.size;
+
+	// Each frame is 2048 samples with 2 channels
+	static constexpr usize frameSize = 2048 * 2;
+	std::array<s16, frameSize> frame;
+	std::array<std::vector<s16>, 2> audioStreams;
+
+	bool queriedStreamInfo = false;
+
+	while (bytesValid != 0) {
+		if (aacDecoder_Fill(decoderHandle, &input, &bufferSize, &bytesValid) != AAC_DEC_OK) {
+			Helpers::warn("Failed to fill AAC decoder with samples");
+			return;
+		}
+
+		auto decodeResult = aacDecoder_DecodeFrame(decoderHandle, frame.data(), frameSize, 0);
+
+		if (decodeResult == AAC_DEC_TRANSPORT_SYNC_ERROR) {
+			// https://android.googlesource.com/platform/external/aac/+/2ddc922/libAACdec/include/aacdecoder_lib.h#362
+			// According to the above, if we get a sync error, we're not meant to stop decoding, but rather just continue feeding data
+		} else if (decodeResult == AAC_DEC_OK) {
+			auto getSampleRate = [](u32 rate) {
+				switch (rate) {
+					case 8000: return AAC::SampleRate::Rate8000;
+					case 11025: return AAC::SampleRate::Rate11025;
+					case 12000: return AAC::SampleRate::Rate12000;
+					case 16000: return AAC::SampleRate::Rate16000;
+					case 22050: return AAC::SampleRate::Rate22050;
+					case 24000: return AAC::SampleRate::Rate24000;
+					case 32000: return AAC::SampleRate::Rate32000;
+					case 44100: return AAC::SampleRate::Rate44100;
+					case 48000:
+					default: return AAC::SampleRate::Rate48000;
+				}
+			};
+
+			auto info = aacDecoder_GetStreamInfo(decoderHandle);
+			response.decodeResponse.sampleCount = info->frameSize;
+			response.decodeResponse.channelCount = info->numChannels;
+			response.decodeResponse.sampleRate = getSampleRate(info->sampleRate);
+
+			int channels = info->numChannels;
+			// Reserve space in our output stream vectors so push_back doesn't do allocations
+			for (int i = 0; i < channels; i++) {
+				audioStreams[i].reserve(audioStreams[i].size() + info->frameSize);
+			}
+
+			// Fetch output pointer for right output channel if we've got > 1 channel
+			if (channels > 1 && outputRight == nullptr) {
+				outputRight = paddrCallback(request.decodeRequest.destAddrRight);
+				// If the right output channel doesn't point to a proper padddr, return
+				if (outputRight == nullptr) {
+					Helpers::warn("Right AAC output channel doesn't point to valid physical address");
+					return;
+				}
+			}
+
+			for (int sample = 0; sample < info->frameSize; sample++) {
+				for (int stream = 0; stream < channels; stream++) {
+					audioStreams[stream].push_back(frame[(sample * channels) + stream]);
+				}
+			}
+		} else {
+			Helpers::warn("Failed to decode AAC frame");
+			return;
+		}
+	}
+
+	for (int i = 0; i < 2; i++) {
+		auto& stream = audioStreams[i];
+		u8* pointer = (i == 0) ? outputLeft : outputRight;
+
+		if (!stream.empty() && pointer != nullptr) {
+			std::memcpy(pointer, stream.data(), stream.size() * sizeof(s16));
+		}
+	}
+}
+
+void AAC::Decoder::initialize() {
+	decoderHandle = aacDecoder_Open(TRANSPORT_TYPE::TT_MP4_ADTS, 1);
+
+	if (decoderHandle == nullptr) [[unlikely]] {
+		return;
+	}
+
+	// Cap output channel count to 2
+	if (aacDecoder_SetParam(decoderHandle, AAC_PCM_MAX_OUTPUT_CHANNELS, 2) != AAC_DEC_OK) [[unlikely]] {
+		aacDecoder_Close(decoderHandle);
+		decoderHandle = nullptr;
+		return;
+	}
+}
+
+AAC::Decoder::~Decoder() {
+	if (isInitialized()) {
+		aacDecoder_Close(decoderHandle);
+		decoderHandle = nullptr;
+	}
+}
--- a/src/core/audio/hle_core.cpp
+++ b/src/core/audio/hle_core.cpp
@ -6,6 +6,7 @@
 #include <thread>
 #include <utility>

+#include "audio/aac_decoder.hpp"
 #include "services/dsp.hpp"

 namespace Audio {
@ -23,6 +24,8 @@ namespace Audio {
 		for (int i = 0; i < sources.size(); i++) {
 			sources[i].index = i;
 		}
+
+		aacDecoder.reset(new Audio::AAC::Decoder());
 	}

 	void HLE_DSP::resetAudioPipe() {
@ -584,7 +587,6 @@ namespace Audio {
 		switch (request.command) {
 			case AAC::Command::EncodeDecode:
 				// Dummy response to stop games from hanging
-				// TODO: Fix this when implementing AAC
 				response.resultCode = AAC::ResultCode::Success;
 				response.decodeResponse.channelCount = 2;
 				response.decodeResponse.sampleCount = 1024;
@ -593,6 +595,10 @@ namespace Audio {

 				response.command = request.command;
 				response.mode = request.mode;
+
+				// We've already got an AAC decoder but it's currently disabled until mixing & output is properly implemented
+				// TODO: Uncomment this when the time comes
+				// aacDecoder->decode(response, request, [this](u32 paddr) { return getPointerPhys<u8>(paddr); });
 				break;

 			case AAC::Command::Init:
--- a/src/core/audio/miniaudio_device.cpp
+++ b/src/core/audio/miniaudio_device.cpp
@ -90,16 +90,17 @@ void MiniAudioDevice::init(Samples& samples, bool safe) {
 	deviceConfig.dataCallback = [](ma_device* device, void* out, const void* input, ma_uint32 frameCount) {
 		auto self = reinterpret_cast<MiniAudioDevice*>(device->pUserData);
 		s16* output = reinterpret_cast<ma_int16*>(out);
+		const usize maxSamples = std::min(self->samples->Capacity(), usize(frameCount * channelCount));

 		// Wait until there's enough samples to pop
-		while (self->samples->size() < frameCount * channelCount) {
+		while (self->samples->size() < maxSamples) {
 			// If audio output is disabled from the emulator thread, make sure that this callback will return and not hang
 			if (!self->running) {
 				return;
 			}
 		}

-		self->samples->pop(output, frameCount * channelCount);
+		self->samples->pop(output, maxSamples);
 	};

 	if (ma_device_init(&context, &deviceConfig, &device) != MA_SUCCESS) {
--- a/src/core/renderer_gl/renderer_gl.cpp
+++ b/src/core/renderer_gl/renderer_gl.cpp
@ -49,7 +49,7 @@ void RendererGL::reset() {
 		gl.useProgram(oldProgram);  // Switch to old GL program
 	}

-#ifdef __ANDROID__
+#ifdef USING_GLES
 	fragShaderGen.setTarget(PICA::ShaderGen::API::GLES, PICA::ShaderGen::Language::GLSL);
 #endif
 }
--- a/src/core/services/hid.cpp
+++ b/src/core/services/hid.cpp
@ -35,6 +35,7 @@ void HIDService::reset() {
 	circlePadX = circlePadY = 0;
 	touchScreenX = touchScreenY = 0;
 	roll = pitch = yaw = 0;
+	accelX = accelY = accelZ = 0;
 }

 void HIDService::handleSyncRequest(u32 messagePointer) {
@ -189,6 +190,20 @@ void HIDService::updateInputs(u64 currentTick) {
 			writeSharedMem<u64>(0x108, currentTick);             // Write new tick count
 		}
 		writeSharedMem<u32>(0x118, nextAccelerometerIndex); // Index last updated by the HID module
+		const size_t accelEntryOffset = 0x128 + (nextAccelerometerIndex * 6);  // Offset in the array of 8 accelerometer entries
+
+		// Raw data of current accelerometer entry
+		// TODO: How is the "raw" data actually calculated?
+		s16* accelerometerDataRaw = getSharedMemPointer<s16>(0x120);
+		accelerometerDataRaw[0] = accelX;
+		accelerometerDataRaw[1] = accelY;
+		accelerometerDataRaw[2] = accelZ;
+
+		// Accelerometer entry in entry table
+		s16* accelerometerData = getSharedMemPointer<s16>(accelEntryOffset);
+		accelerometerData[0] = accelX;
+		accelerometerData[1] = accelY;
+		accelerometerData[2] = accelZ;
 		nextAccelerometerIndex = (nextAccelerometerIndex + 1) % 8; // Move to next entry

 		// Next, update gyro state
@ -197,9 +212,10 @@ void HIDService::updateInputs(u64 currentTick) {
 			writeSharedMem<u64>(0x158, currentTick);             // Write new tick count
 		}
 		const size_t gyroEntryOffset = 0x178 + (nextGyroIndex * 6);  // Offset in the array of 8 touchscreen entries
-		writeSharedMem<u16>(gyroEntryOffset, pitch);
-		writeSharedMem<u16>(gyroEntryOffset + 2, yaw);
-		writeSharedMem<u16>(gyroEntryOffset + 4, roll);
+		s16* gyroData = getSharedMemPointer<s16>(gyroEntryOffset);
+		gyroData[0] = pitch;
+		gyroData[1] = yaw;
+		gyroData[2] = roll;

 		// Since gyroscope euler angles are relative, we zero them out here and the frontend will update them again when we receive a new rotation
 		roll = pitch = yaw = 0;