diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 2b000320..2640ccce 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -70,6 +70,38 @@ class GPU { Renderer renderer; PicaVertex getImmediateModeVertex(); public: + enum : u32 { + LIGHT_LUT_D0=0, + LIGHT_LUT_D1, + LIGHT_LUT_FR, + LIGHT_LUT_RB, + LIGHT_LUT_RG, + LIGHT_LUT_RR, + LIGHT_LUT_SP0=0x8, + LIGHT_LUT_SP1, + LIGHT_LUT_SP2, + LIGHT_LUT_SP3, + LIGHT_LUT_SP4, + LIGHT_LUT_SP5, + LIGHT_LUT_SP6, + LIGHT_LUT_SP7, + LIGHT_LUT_DA0=0x10, + LIGHT_LUT_DA1, + LIGHT_LUT_DA2, + LIGHT_LUT_DA3, + LIGHT_LUT_DA4, + LIGHT_LUT_DA5, + LIGHT_LUT_DA6, + LIGHT_LUT_DA7, + LIGHT_LUT_COUNT + }; + //256 entries per LUT with each LUT as its own row forming a 2D image 256xLIGHT_LUT_COUNT + //Encoded in PICA native format + std::array lightingLUT; + //Used to prevent uploading the lighting_lut on every draw call + //Set to true when the CPU writes to the lighting_lut + //Set to false by the renderer when the lighting_lut is uploaded ot the GPU + bool lightingLUTDirty = false; GPU(Memory& mem); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index 6c868484..b4ff3498 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -55,6 +55,17 @@ namespace PICA { ColourBufferLoc = 0x11D, FramebufferSize = 0x11E, + //LightingRegs + LightingLUTIndex = 0x01C5, + LightingLUTData0 = 0x01C8, + LightingLUTData1 = 0x01C9, + LightingLUTData2 = 0x01CA, + LightingLUTData3 = 0x01CB, + LightingLUTData4 = 0x01CC, + LightingLUTData5 = 0x01CD, + LightingLUTData6 = 0x01CE, + LightingLUTData7 = 0x01CF, + // Geometry pipeline registers VertexAttribLoc = 0x200, AttribFormatLow = 0x201, diff --git a/include/renderer_gl/renderer_gl.hpp b/include/renderer_gl/renderer_gl.hpp index 2f3b2119..bf85904b 100644 --- a/include/renderer_gl/renderer_gl.hpp +++ b/include/renderer_gl/renderer_gl.hpp @@ -67,6 +67,7 @@ class Renderer { const std::array& regs; OpenGL::Texture screenTexture; + GLuint lightLUTTextureArray; OpenGL::Framebuffer screenFramebuffer; OpenGL::Framebuffer getColourFBO(); @@ -77,6 +78,7 @@ class Renderer { void bindDepthBuffer(); void setupTextureEnvState(); void bindTexturesToSlots(); + void updateLightingLUT(); public: Renderer(GPU& gpu, const std::array& internalRegs) : gpu(gpu), regs(internalRegs) {} diff --git a/src/core/PICA/gpu.cpp b/src/core/PICA/gpu.cpp index 99e5221f..2efc4195 100644 --- a/src/core/PICA/gpu.cpp +++ b/src/core/PICA/gpu.cpp @@ -21,6 +21,7 @@ void GPU::reset() { shaderUnit.reset(); shaderJIT.reset(); std::memset(vram, 0, vramSize); + lightingLUT.fill(0); totalAttribCount = 0; fixedAttribMask = 0; diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index 610cfe16..f04f44f4 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -28,7 +28,19 @@ u32 GPU::readInternalReg(u32 index) { Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index); return 0; } - + using namespace PICA::InternalRegs; + if(index>=LightingLUTData0&&index<=LightingLUTData7){ + uint32_t ind = regs[LightingLUTIndex]; + uint32_t lut_id = (ind>>8)&(0x1f); + uint32_t lut_addr = ind&0xff; + uint32_t value = 0xffffffff; + if(lut_id>8)&(0x1f); + uint32_t lut_addr = ind&0xff; + if(lut_id=FR_LUT&&lut<=RR_LUT)lut-=1; + if(lut==SP_LUT)lut=8+light; + return texture(u_tex_lighting_lut,vec2(value,lut)).r; } vec3 regToColor(uint reg){ return vec3( @@ -306,17 +308,17 @@ const char* fragmentShader = R"( vec3 half_vector = normalize(normalize(light_vector)+view); - for(int i=0;i<7u;++i){ - if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+i,1)==0){ - uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,i*4,3); + for(int c=0;c<7u;++c){ + if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+c,1)==0){ + uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,c*4,3); float scale = float(1u<=6u) scale/=256.0; - uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,i*4,3); - if(input_id==0u)d[i] = dot(normal,half_vector); - else if(input_id==1u)d[i] = dot(view,half_vector); - else if(input_id==2u)d[i] = dot(normal,view); - else if(input_id==3u)d[i] = dot(light_vector,normal); + uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,c*4,3); + if(input_id==0u)d[c] = dot(normal,half_vector); + else if(input_id==1u)d[c] = dot(view,half_vector); + else if(input_id==2u)d[c] = dot(normal,view); + else if(input_id==3u)d[c] = dot(light_vector,normal); else if(input_id==4u){ uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id); uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id); @@ -325,16 +327,17 @@ const char* fragmentShader = R"( decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11), decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11) ); - d[i] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); + d[c] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP); }else if(input_id==5u){ - d[i] = 1.0;//TODO: cos (aka CP); + d[c] = 1.0;//TODO: cos (aka CP); error_unimpl = true; - }else d[i] = 1.0; + }else d[c] = 1.0; - d[i] = lutLookup(i,d[i])*scale; - if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*i,1)!=0u)d[i]=abs(d[i]); - }else d[i]=1.0; + d[c] = lutLookup(c,light_id,d[c]*0.5+0.5)*scale; + if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*c,1)!=0u)d[c]=abs(d[c]); + }else d[c]=1.0; } + uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4); if(lookup_config==0){ d[D1_LUT] = 1.0; @@ -421,6 +424,8 @@ const char* fragmentShader = R"( if (tevUnimplementedSourceFlag) { // fragColour = vec4(1.0, 0.0, 1.0, 1.0); } + //fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr; + // Get original depth value by converting from [near, far] = [0, 1] to [-1, 1] // We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1] @@ -566,6 +571,7 @@ void Renderer::initGraphicsContext() { glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1); glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2); + glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3); OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex); OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment); @@ -610,6 +616,8 @@ void Renderer::initGraphicsContext() { // Create texture and framebuffer for the 3DS screen const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320 const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall + + glGenTextures(1,&lightLUTTextureArray); auto prevTexture = OpenGL::getTex2D(); screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8); @@ -739,6 +747,8 @@ void Renderer::bindTexturesToSlots() { tex.bind(); } + glActiveTexture(GL_TEXTURE0+3); + glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); // Update the texture unit configuration uniform if it changed @@ -748,6 +758,22 @@ void Renderer::bindTexturesToSlots() { glUniform1ui(texUnitConfigLoc, texUnitConfig); } } +void Renderer::updateLightingLUT(){ + std::array u16_lightinglut; + for(int i=0;i vertices) { // The fourth type is meant to be "Geometry primitive". TODO: Find out what that is @@ -811,6 +837,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span bindTexturesToSlots(); //Upload Pica Registers glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]); + if(gpu.lightingLUTDirty)updateLightingLUT(); // TODO: Actually use this float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;