Fix shader-interpreter non-IEEE outputs (#459)

* Re-enable non-IEEE shader test * Fix shader-interpreter RCP/RSQ output Handle the `-0.0` special-case * Fix shader-interpreter MIN/MAX output Takes advantage of min/max's properties regarding non-finites to return NaN depending on its input position: ``` max(NaN, 2.f) -> NaN max(2.f, NaN) -> 2 min(NaN, 2.f) -> NaN min(2.f, NaN) -> 2 ``` * Fix shader-interpreter FLR indexing bug `3 - 1` should be `3 - i`
2025-07-02 13:26:24 +12:00 · 2024-03-11 23:34:02 -07:00 · 2024-03-11 23:34:02 -07:00 · c89fe05b8a
commit c89fe05b8a
parent fe9939689d
2 changed files with 24 additions and 8 deletions
--- a/src/core/PICA/shader_interpreter.cpp
+++ b/src/core/PICA/shader_interpreter.cpp
@ -223,7 +223,7 @@ void PICAShader::flr(u32 instruction) {
 	u32 componentMask = operandDescriptor & 0xf;
 	for (int i = 0; i < 4; i++) {
 		if (componentMask & (1 << i)) {
-			destVector[3 - i] = f24::fromFloat32(std::floor(srcVector[3 - 1].toFloat32()));
+			destVector[3 - i] = f24::fromFloat32(std::floor(srcVector[3 - i].toFloat32()));
 		}
 	}
 }
@ -244,8 +244,12 @@ void PICAShader::max(u32 instruction) {
 	u32 componentMask = operandDescriptor & 0xf;
 	for (int i = 0; i < 4; i++) {
 		if (componentMask & (1 << i)) {
-			const auto maximum = srcVec1[3 - i] > srcVec2[3 - i] ? srcVec1[3 - i] : srcVec2[3 - i];
-			destVector[3 - i] = maximum;
+			const float inputA = srcVec1[3 - i].toFloat32();
+			const float inputB = srcVec2[3 - i].toFloat32();
+			// max(NaN, 2.f) -> NaN
+			// max(2.f, NaN) -> 2
+			const auto& maximum = std::isinf(inputB) ? inputB : std::max(inputB, inputA);
+			destVector[3 - i] = f24::fromFloat32(maximum);
 		}
 	}
 }
@ -266,8 +270,12 @@ void PICAShader::min(u32 instruction) {
 	u32 componentMask = operandDescriptor & 0xf;
 	for (int i = 0; i < 4; i++) {
 		if (componentMask & (1 << i)) {
-			const auto mininum = srcVec1[3 - i] < srcVec2[3 - i] ? srcVec1[3 - i] : srcVec2[3 - i];
-			destVector[3 - i] = mininum;
+			const float inputA = srcVec1[3 - i].toFloat32();
+			const float inputB = srcVec2[3 - i].toFloat32();
+			// min(NaN, 2.f) -> NaN
+			// min(2.f, NaN) -> 2
+			const auto& mininum = std::min(inputB, inputA);
+			destVector[3 - i] = f24::fromFloat32(mininum);
 		}
 	}
 }
@ -382,7 +390,11 @@ void PICAShader::rcp(u32 instruction) {
 	vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);

 	vec4f& destVector = getDest(dest);
-	f24 res = f24::fromFloat32(1.0f) / srcVec1[0];
+	float input = srcVec1[0].toFloat32();
+	if (input == -0.0f) {
+		input = 0.0f;
+	}
+	const f24 res = f24::fromFloat32(1.0f / input);

 	u32 componentMask = operandDescriptor & 0xf;
 	for (int i = 0; i < 4; i++) {
@ -402,7 +414,11 @@ void PICAShader::rsq(u32 instruction) {
 	vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);

 	vec4f& destVector = getDest(dest);
-	f24 res = f24::fromFloat32(1.0f / std::sqrt(srcVec1[0].toFloat32()));
+	float input = srcVec1[0].toFloat32();
+	if (input == -0.0f) {
+		input = 0.0f;
+	}
+	const f24 res = f24::fromFloat32(1.0f / std::sqrt(input));

 	u32 componentMask = operandDescriptor & 0xf;
 	for (int i = 0; i < 4; i++) {
--- a/tests/shader.cpp
+++ b/tests/shader.cpp
@ -113,7 +113,7 @@ SHADER_TEST_CASE("RCP", "[shader][vertex]") {
 		{nihstro::OpCode::Id::END},
 	});

-	// REQUIRE(shader->RunScalar({-0.0f}) == INFINITY); // Violates IEEE
+	REQUIRE(shader->runScalar({-0.0f}) == INFINITY);
 	REQUIRE(shader->runScalar({0.0f}) == INFINITY);
 	REQUIRE(shader->runScalar({INFINITY}) == 0.0f);
 	REQUIRE(std::isnan(shader->runScalar({NAN})));