[GL] Initial Implementation of Lighting LUTs

This commit is contained in:
Sky 2023-07-02 16:14:11 -07:00
parent 769e90a45f
commit 12dbaeaeb0
6 changed files with 122 additions and 18 deletions

View file

@ -70,6 +70,38 @@ class GPU {
Renderer renderer;
PicaVertex getImmediateModeVertex();
public:
enum : u32 {
LIGHT_LUT_D0=0,
LIGHT_LUT_D1,
LIGHT_LUT_FR,
LIGHT_LUT_RB,
LIGHT_LUT_RG,
LIGHT_LUT_RR,
LIGHT_LUT_SP0=0x8,
LIGHT_LUT_SP1,
LIGHT_LUT_SP2,
LIGHT_LUT_SP3,
LIGHT_LUT_SP4,
LIGHT_LUT_SP5,
LIGHT_LUT_SP6,
LIGHT_LUT_SP7,
LIGHT_LUT_DA0=0x10,
LIGHT_LUT_DA1,
LIGHT_LUT_DA2,
LIGHT_LUT_DA3,
LIGHT_LUT_DA4,
LIGHT_LUT_DA5,
LIGHT_LUT_DA6,
LIGHT_LUT_DA7,
LIGHT_LUT_COUNT
};
//256 entries per LUT with each LUT as its own row forming a 2D image 256xLIGHT_LUT_COUNT
//Encoded in PICA native format
std::array<uint32_t,LIGHT_LUT_COUNT*256> lightingLUT;
//Used to prevent uploading the lighting_lut on every draw call
//Set to true when the CPU writes to the lighting_lut
//Set to false by the renderer when the lighting_lut is uploaded ot the GPU
bool lightingLUTDirty = false;
GPU(Memory& mem);
void initGraphicsContext() { renderer.initGraphicsContext(); }
void getGraphicsContext() { renderer.getGraphicsContext(); }

View file

@ -55,6 +55,17 @@ namespace PICA {
ColourBufferLoc = 0x11D,
FramebufferSize = 0x11E,
//LightingRegs
LightingLUTIndex = 0x01C5,
LightingLUTData0 = 0x01C8,
LightingLUTData1 = 0x01C9,
LightingLUTData2 = 0x01CA,
LightingLUTData3 = 0x01CB,
LightingLUTData4 = 0x01CC,
LightingLUTData5 = 0x01CD,
LightingLUTData6 = 0x01CE,
LightingLUTData7 = 0x01CF,
// Geometry pipeline registers
VertexAttribLoc = 0x200,
AttribFormatLow = 0x201,

View file

@ -67,6 +67,7 @@ class Renderer {
const std::array<u32, regNum>& regs;
OpenGL::Texture screenTexture;
GLuint lightLUTTextureArray;
OpenGL::Framebuffer screenFramebuffer;
OpenGL::Framebuffer getColourFBO();
@ -77,6 +78,7 @@ class Renderer {
void bindDepthBuffer();
void setupTextureEnvState();
void bindTexturesToSlots();
void updateLightingLUT();
public:
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}

View file

@ -21,6 +21,7 @@ void GPU::reset() {
shaderUnit.reset();
shaderJIT.reset();
std::memset(vram, 0, vramSize);
lightingLUT.fill(0);
totalAttribCount = 0;
fixedAttribMask = 0;

View file

@ -28,7 +28,19 @@ u32 GPU::readInternalReg(u32 index) {
Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index);
return 0;
}
using namespace PICA::InternalRegs;
if(index>=LightingLUTData0&&index<=LightingLUTData7){
uint32_t ind = regs[LightingLUTIndex];
uint32_t lut_id = (ind>>8)&(0x1f);
uint32_t lut_addr = ind&0xff;
uint32_t value = 0xffffffff;
if(lut_id<LIGHT_LUT_COUNT){
value = lightingLUT[lut_id*256+lut_addr];
}
lut_addr+=1;
regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff);
return value;
}
return regs[index];
}
@ -91,6 +103,25 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
break;
}
case LightingLUTData0:
case LightingLUTData1:
case LightingLUTData2:
case LightingLUTData3:
case LightingLUTData4:
case LightingLUTData5:
case LightingLUTData6:
case LightingLUTData7:{
uint32_t ind = regs[LightingLUTIndex];
uint32_t lut_id = (ind>>8)&(0x1f);
uint32_t lut_addr = ind&0xff;
if(lut_id<LIGHT_LUT_COUNT){
lightingLUT[lut_id*256+lut_addr]=newValue;
lightingLUTDirty = true;
}
lut_addr+=1;
regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff);
} break;
case VertexFloatUniformIndex:
shaderUnit.vs.setFloatUniformIndex(value);
break;

View file

@ -101,6 +101,7 @@ const char* fragmentShader = R"(
uniform sampler2D u_tex0;
uniform sampler2D u_tex1;
uniform sampler2D u_tex2;
uniform sampler1DArray u_tex_lighting_lut;
uniform uint u_picaRegs[0x200-0x47];
@ -218,9 +219,10 @@ const char* fragmentShader = R"(
#define RG_LUT 5u
#define RR_LUT 6u
float lutLookup(uint lut, float value){
//TODO: Implement this.
return value;
float lutLookup(uint lut, uint light, float value){
if(lut>=FR_LUT&&lut<=RR_LUT)lut-=1;
if(lut==SP_LUT)lut=8+light;
return texture(u_tex_lighting_lut,vec2(value,lut)).r;
}
vec3 regToColor(uint reg){
return vec3(
@ -306,17 +308,17 @@ const char* fragmentShader = R"(
vec3 half_vector = normalize(normalize(light_vector)+view);
for(int i=0;i<7u;++i){
if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+i,1)==0){
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,i*4,3);
for(int c=0;c<7u;++c){
if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+c,1)==0){
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,c*4,3);
float scale = float(1u<<scale_id);
if(scale_id>=6u) scale/=256.0;
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,i*4,3);
if(input_id==0u)d[i] = dot(normal,half_vector);
else if(input_id==1u)d[i] = dot(view,half_vector);
else if(input_id==2u)d[i] = dot(normal,view);
else if(input_id==3u)d[i] = dot(light_vector,normal);
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,c*4,3);
if(input_id==0u)d[c] = dot(normal,half_vector);
else if(input_id==1u)d[c] = dot(view,half_vector);
else if(input_id==2u)d[c] = dot(normal,view);
else if(input_id==3u)d[c] = dot(light_vector,normal);
else if(input_id==4u){
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id);
uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id);
@ -325,16 +327,17 @@ const char* fragmentShader = R"(
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11),
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11)
);
d[i] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP);
d[c] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP);
}else if(input_id==5u){
d[i] = 1.0;//TODO: cos <greek symbol> (aka CP);
d[c] = 1.0;//TODO: cos <greek symbol> (aka CP);
error_unimpl = true;
}else d[i] = 1.0;
}else d[c] = 1.0;
d[i] = lutLookup(i,d[i])*scale;
if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*i,1)!=0u)d[i]=abs(d[i]);
}else d[i]=1.0;
d[c] = lutLookup(c,light_id,d[c]*0.5+0.5)*scale;
if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*c,1)!=0u)d[c]=abs(d[c]);
}else d[c]=1.0;
}
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
if(lookup_config==0){
d[D1_LUT] = 1.0;
@ -421,6 +424,8 @@ const char* fragmentShader = R"(
if (tevUnimplementedSourceFlag) {
// fragColour = vec4(1.0, 0.0, 1.0, 1.0);
}
//fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
@ -566,6 +571,7 @@ void Renderer::initGraphicsContext() {
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
@ -610,6 +616,8 @@ void Renderer::initGraphicsContext() {
// Create texture and framebuffer for the 3DS screen
const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
glGenTextures(1,&lightLUTTextureArray);
auto prevTexture = OpenGL::getTex2D();
screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
@ -739,6 +747,8 @@ void Renderer::bindTexturesToSlots() {
tex.bind();
}
glActiveTexture(GL_TEXTURE0+3);
glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray);
glActiveTexture(GL_TEXTURE0);
// Update the texture unit configuration uniform if it changed
@ -748,6 +758,22 @@ void Renderer::bindTexturesToSlots() {
glUniform1ui(texUnitConfigLoc, texUnitConfig);
}
}
void Renderer::updateLightingLUT(){
std::array<u16, sizeof(gpu.lightingLUT)/sizeof(gpu.lightingLUT[0])> u16_lightinglut;
for(int i=0;i<gpu.lightingLUT.size();++i){
uint64_t value = gpu.lightingLUT[i]&((1<<12)-1);
u16_lightinglut[i] = value*65535/4095;
}
glActiveTexture(GL_TEXTURE0+3);
glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray);
glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, gpu.LIGHT_LUT_COUNT,0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glActiveTexture(GL_TEXTURE0+0);
gpu.lightingLUTDirty=false;
}
void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) {
// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
@ -811,6 +837,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
bindTexturesToSlots();
//Upload Pica Registers
glUniform1uiv(picaRegLoc,0x200-0x47,&regs[0x47]);
if(gpu.lightingLUTDirty)updateLightingLUT();
// TODO: Actually use this
float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;