Implement shader-interpreter relative MOVA addressing (#471)

* Add shader uniform-read unit test * Add unit test f24 vector formatter * Add Address Register Offset shader unit test * Implement float-uniform out-of-bound return value In the case that the resulting float-uniform index is greater than the 96 slots that it has, a result of `{1,1,1,1}` is to be returned. * Implement shader relative addressing Fails on the negative unit tests at the moment but passes all of the others. * Fix `MOVA` source register indexing
2025-04-06 06:05:40 +12:00 · 2024-03-22 09:48:03 -07:00 · 2024-03-22 09:48:03 -07:00 · 5284109fd4
commit 5284109fd4
parent 3270cfe602
2 changed files with 118 additions and 13 deletions
--- a/src/core/PICA/shader_interpreter.cpp
+++ b/src/core/PICA/shader_interpreter.cpp
@ -119,9 +119,17 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) {
 		return source;

 	switch (index) {
-		case 0: [[likely]] return u8(source);  // No offset applied
-		case 1: return u8(source + addrRegister[0]);
-		case 2: return u8(source + addrRegister[1]);
+		// No offset applied
+		case 0: [[likely]] return u8(source);
+		// Address register
+		case 1:
+		case 2: {
+			const s32 offset = addrRegister[index - 1];
+			if (offset < -128 || offset > 127) [[unlikely]] {
+				return u8(source);
+			}
+			return u8(source + offset);
+		}
 		case 3: return u8(source + loopCounter);
 	}

@ -130,15 +138,16 @@ u8 PICAShader::getIndexedSource(u32 source, u32 index) {
 }

 PICAShader::vec4f PICAShader::getSource(u32 source) {
-	if (source < 0x10)
+	if (source < 0x10) {
 		return inputs[source];
-	else if (source < 0x20)
+	} else if (source < 0x20) {
 		return tempRegisters[source - 0x10];
-	else if (source <= 0x7f)
-		return floatUniforms[source - 0x20];
-	else {
-		Helpers::warn("[PICA] Unimplemented source value: %X\n", source);
-		return vec4f({f24::zero(), f24::zero(), f24::zero(), f24::zero()});
+	} else {
+		const usize floatIndex = (source - 0x20) & 0x7f;
+		if (floatIndex >= 96) [[unlikely]] {
+			return vec4f({f24::fromFloat32(1.0f), f24::fromFloat32(1.0f), f24::fromFloat32(1.0f), f24::fromFloat32(1.0f)});
+		}
+		return floatUniforms[floatIndex];
 	}
 }

@ -300,10 +309,10 @@ void PICAShader::mov(u32 instruction) {

 void PICAShader::mova(u32 instruction) {
 	const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
-	const u32 src = getBits<12, 7>(instruction);
+	u32 src = getBits<12, 7>(instruction);
 	const u32 idx = getBits<19, 2>(instruction);

-	if (idx) Helpers::panic("[PICA] MOVA: idx != 0");
+	src = getIndexedSource(src, idx);
 	vec4f srcVector = getSourceSwizzled<1>(src, operandDescriptor);

 	u32 componentMask = operandDescriptor & 0xf;
--- a/tests/shader.cpp
+++ b/tests/shader.cpp
@ -14,6 +14,13 @@ static const nihstro::SourceRegister input0 = nihstro::SourceRegister::MakeInput
 static const nihstro::SourceRegister input1 = nihstro::SourceRegister::MakeInput(1);
 static const nihstro::DestRegister output0 = nihstro::DestRegister::MakeOutput(0);

+static const std::array<Floats::f24, 4> vectorOnes = {
+	Floats::f24::fromFloat32(1.0f),
+	Floats::f24::fromFloat32(1.0f),
+	Floats::f24::fromFloat32(1.0f),
+	Floats::f24::fromFloat32(1.0f),
+};
+
 static std::unique_ptr<PICAShader> assembleVertexShader(std::initializer_list<nihstro::InlineAsm> code) {
 	const auto shaderBinary = nihstro::InlineAsm::CompileToRawBinary(code);
 	auto newShader = std::make_unique<PICAShader>(ShaderType::Vertex);
@ -66,7 +73,7 @@ class ShaderInterpreterTest {

 	[[nodiscard]] std::array<std::array<Floats::f24, 4>, 96>& floatUniforms() const { return shader->floatUniforms; }
 	[[nodiscard]] std::array<std::array<u8, 4>, 4>& intUniforms() const { return shader->intUniforms; }
-	[[nodiscard]] u32& boolUniform() const { return shader->boolUniform; }
+	[[nodiscard]] u32& boolUniforms() const { return shader->boolUniform; }

 	static std::unique_ptr<ShaderInterpreterTest> assembleTest(std::initializer_list<nihstro::InlineAsm> code) {
 		return std::make_unique<ShaderInterpreterTest>(code);
@ -92,6 +99,15 @@ class ShaderJITTest final : public ShaderInterpreterTest {
 #define SHADER_TEST_CASE(NAME, TAG) TEMPLATE_TEST_CASE(NAME, TAG, ShaderInterpreterTest)
 #endif

+namespace Catch {
+	template <>
+	struct StringMaker<std::array<Floats::f24, 4>> {
+		static std::string convert(std::array<Floats::f24, 4> value) {
+			return std::format("({}, {}, {}, {})", value[0].toFloat32(), value[1].toFloat32(), value[2].toFloat32(), value[3].toFloat32());
+		}
+	};
+}  // namespace Catch
+
 SHADER_TEST_CASE("ADD", "[shader][vertex]") {
 	const auto shader = TestType::assembleTest({
 		{nihstro::OpCode::Id::ADD, output0, input0, input1},
@ -268,4 +284,84 @@ SHADER_TEST_CASE("FLR", "[shader][vertex]") {
 	REQUIRE(shader->runScalar({-1.5}) == -2.0f);
 	REQUIRE(std::isnan(shader->runScalar({NAN})));
 	REQUIRE(std::isinf(shader->runScalar({INFINITY})));
+}
+
+SHADER_TEST_CASE("Uniform Read", "[shader][vertex][uniform]") {
+	const auto constant0 = nihstro::SourceRegister::MakeFloat(0);
+	auto shader = TestType::assembleTest({
+		{nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1
+		},
+		{nihstro::OpCode::Id::MOV, output0, "xyzw", constant0, "xyzw", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1},
+		{nihstro::OpCode::Id::END},
+	});
+
+	// Generate float uniforms
+	std::array<std::array<Floats::f24, 4>, 96> floatUniforms = {};
+	for (u32 i = 0; i < 96; ++i) {
+		const float color = (i * 2.0f) / 255.0f;
+		const Floats::f24 color24 = Floats::f24::fromFloat32(color);
+		const std::array<Floats::f24, 4> testValue = {color24, color24, color24, Floats::f24::fromFloat32(1.0f)};
+		shader->floatUniforms()[i] = testValue;
+		floatUniforms[i] = testValue;
+	}
+
+	for (u32 i = 0; i < 96; ++i) {
+		const float index = static_cast<float>(i);
+		// Intentionally use some fractional values to verify float->integer
+		// truncation during address translation
+		const float fractional = (i % 17) / 17.0f;
+
+		REQUIRE(shader->runVector({index + fractional}) == floatUniforms[i]);
+	}
+}
+
+SHADER_TEST_CASE("Address Register Offset", "[video_core][shader][shader_jit]") {
+	const auto constant40 = nihstro::SourceRegister::MakeFloat(40);
+	auto shader = TestType::assembleTest({
+		// mova a0.x, sh_input.x
+		{nihstro::OpCode::Id::MOVA, nihstro::DestRegister{}, "x", input0, "x", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1
+		},
+		// mov sh_output.xyzw, c40[a0.x].xyzw
+		{nihstro::OpCode::Id::MOV, output0, "xyzw", constant40, "xyzw", nihstro::SourceRegister{}, "", nihstro::InlineAsm::RelativeAddress::A1},
+		{nihstro::OpCode::Id::END},
+	});
+
+	// Generate uniforms
+	const bool inverted = true;
+	std::array<std::array<Floats::f24, 4>, 96> floatUniforms = {};
+	for (u8 i = 0; i < 0x80; i++) {
+		// Float uniforms
+		if (i >= 0x00 && i < 0x60) {
+			const u32 base = inverted ? (0x60 - i) : i;
+			const auto color = (base * 2.f) / 255.0f;
+			const auto color24 = Floats::f24::fromFloat32(color);
+			const std::array<Floats::f24, 4> testValue = {color24, color24, color24, Floats::f24::fromFloat32(1.0f)};
+			shader->floatUniforms()[i] = testValue;
+			floatUniforms[i] = testValue;
+		}
+		// Integer uniforms
+		else if (i >= 0x60 && i < 0x64) {
+			const u8 color = static_cast<u8>((i - 0x60) * 0x10);
+			shader->intUniforms()[i - 0x60] = {color, color, color, 255};
+		}
+		// Bool uniforms(bools packed into an integer)
+		else if (i >= 0x70 && i < 0x80) {
+			shader->boolUniforms() |= (i >= 0x78) << (i - 0x70);
+		}
+	}
+
+	REQUIRE(shader->runVector({0.f}) == floatUniforms[40]);
+	REQUIRE(shader->runVector({13.f}) == floatUniforms[53]);
+	REQUIRE(shader->runVector({50.f}) == floatUniforms[90]);
+	REQUIRE(shader->runVector({60.f}) == vectorOnes);
+	REQUIRE(shader->runVector({74.f}) == vectorOnes);
+	REQUIRE(shader->runVector({87.f}) == vectorOnes);
+	REQUIRE(shader->runVector({88.f}) == floatUniforms[0]);
+	REQUIRE(shader->runVector({128.f}) == floatUniforms[40]);
+	REQUIRE(shader->runVector({-40.f}) == floatUniforms[0]);
+	REQUIRE(shader->runVector({-42.f}) == vectorOnes);
+	REQUIRE(shader->runVector({-70.f}) == vectorOnes);
+	REQUIRE(shader->runVector({-73.f}) == floatUniforms[95]);
+	REQUIRE(shader->runVector({-127.f}) == floatUniforms[41]);
+	REQUIRE(shader->runVector({-129.f}) == floatUniforms[40]);
 }