mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-06 22:25:41 +12:00
Fix shader-interpreter non-IEEE outputs (#459)
* Re-enable non-IEEE shader test * Fix shader-interpreter RCP/RSQ output Handle the `-0.0` special-case * Fix shader-interpreter MIN/MAX output Takes advantage of min/max's properties regarding non-finites to return NaN depending on its input position: ``` max(NaN, 2.f) -> NaN max(2.f, NaN) -> 2 min(NaN, 2.f) -> NaN min(2.f, NaN) -> 2 ``` * Fix shader-interpreter FLR indexing bug `3 - 1` should be `3 - i`
This commit is contained in:
parent
fe9939689d
commit
c89fe05b8a
2 changed files with 24 additions and 8 deletions
|
@ -223,7 +223,7 @@ void PICAShader::flr(u32 instruction) {
|
||||||
u32 componentMask = operandDescriptor & 0xf;
|
u32 componentMask = operandDescriptor & 0xf;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
if (componentMask & (1 << i)) {
|
if (componentMask & (1 << i)) {
|
||||||
destVector[3 - i] = f24::fromFloat32(std::floor(srcVector[3 - 1].toFloat32()));
|
destVector[3 - i] = f24::fromFloat32(std::floor(srcVector[3 - i].toFloat32()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -244,8 +244,12 @@ void PICAShader::max(u32 instruction) {
|
||||||
u32 componentMask = operandDescriptor & 0xf;
|
u32 componentMask = operandDescriptor & 0xf;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
if (componentMask & (1 << i)) {
|
if (componentMask & (1 << i)) {
|
||||||
const auto maximum = srcVec1[3 - i] > srcVec2[3 - i] ? srcVec1[3 - i] : srcVec2[3 - i];
|
const float inputA = srcVec1[3 - i].toFloat32();
|
||||||
destVector[3 - i] = maximum;
|
const float inputB = srcVec2[3 - i].toFloat32();
|
||||||
|
// max(NaN, 2.f) -> NaN
|
||||||
|
// max(2.f, NaN) -> 2
|
||||||
|
const auto& maximum = std::isinf(inputB) ? inputB : std::max(inputB, inputA);
|
||||||
|
destVector[3 - i] = f24::fromFloat32(maximum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -266,8 +270,12 @@ void PICAShader::min(u32 instruction) {
|
||||||
u32 componentMask = operandDescriptor & 0xf;
|
u32 componentMask = operandDescriptor & 0xf;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
if (componentMask & (1 << i)) {
|
if (componentMask & (1 << i)) {
|
||||||
const auto mininum = srcVec1[3 - i] < srcVec2[3 - i] ? srcVec1[3 - i] : srcVec2[3 - i];
|
const float inputA = srcVec1[3 - i].toFloat32();
|
||||||
destVector[3 - i] = mininum;
|
const float inputB = srcVec2[3 - i].toFloat32();
|
||||||
|
// min(NaN, 2.f) -> NaN
|
||||||
|
// min(2.f, NaN) -> 2
|
||||||
|
const auto& mininum = std::min(inputB, inputA);
|
||||||
|
destVector[3 - i] = f24::fromFloat32(mininum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -382,7 +390,11 @@ void PICAShader::rcp(u32 instruction) {
|
||||||
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||||
|
|
||||||
vec4f& destVector = getDest(dest);
|
vec4f& destVector = getDest(dest);
|
||||||
f24 res = f24::fromFloat32(1.0f) / srcVec1[0];
|
float input = srcVec1[0].toFloat32();
|
||||||
|
if (input == -0.0f) {
|
||||||
|
input = 0.0f;
|
||||||
|
}
|
||||||
|
const f24 res = f24::fromFloat32(1.0f / input);
|
||||||
|
|
||||||
u32 componentMask = operandDescriptor & 0xf;
|
u32 componentMask = operandDescriptor & 0xf;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
|
@ -402,7 +414,11 @@ void PICAShader::rsq(u32 instruction) {
|
||||||
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
vec4f srcVec1 = getSourceSwizzled<1>(src1, operandDescriptor);
|
||||||
|
|
||||||
vec4f& destVector = getDest(dest);
|
vec4f& destVector = getDest(dest);
|
||||||
f24 res = f24::fromFloat32(1.0f / std::sqrt(srcVec1[0].toFloat32()));
|
float input = srcVec1[0].toFloat32();
|
||||||
|
if (input == -0.0f) {
|
||||||
|
input = 0.0f;
|
||||||
|
}
|
||||||
|
const f24 res = f24::fromFloat32(1.0f / std::sqrt(input));
|
||||||
|
|
||||||
u32 componentMask = operandDescriptor & 0xf;
|
u32 componentMask = operandDescriptor & 0xf;
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
|
|
|
@ -113,7 +113,7 @@ SHADER_TEST_CASE("RCP", "[shader][vertex]") {
|
||||||
{nihstro::OpCode::Id::END},
|
{nihstro::OpCode::Id::END},
|
||||||
});
|
});
|
||||||
|
|
||||||
// REQUIRE(shader->RunScalar({-0.0f}) == INFINITY); // Violates IEEE
|
REQUIRE(shader->runScalar({-0.0f}) == INFINITY);
|
||||||
REQUIRE(shader->runScalar({0.0f}) == INFINITY);
|
REQUIRE(shader->runScalar({0.0f}) == INFINITY);
|
||||||
REQUIRE(shader->runScalar({INFINITY}) == 0.0f);
|
REQUIRE(shader->runScalar({INFINITY}) == 0.0f);
|
||||||
REQUIRE(std::isnan(shader->runScalar({NAN})));
|
REQUIRE(std::isnan(shader->runScalar({NAN})));
|
||||||
|
|
Loading…
Add table
Reference in a new issue