diff --git a/include/PICA/gpu.hpp b/include/PICA/gpu.hpp index 2640ccce..ced2c557 100644 --- a/include/PICA/gpu.hpp +++ b/include/PICA/gpu.hpp @@ -69,39 +69,18 @@ class GPU { Renderer renderer; PicaVertex getImmediateModeVertex(); -public: - enum : u32 { - LIGHT_LUT_D0=0, - LIGHT_LUT_D1, - LIGHT_LUT_FR, - LIGHT_LUT_RB, - LIGHT_LUT_RG, - LIGHT_LUT_RR, - LIGHT_LUT_SP0=0x8, - LIGHT_LUT_SP1, - LIGHT_LUT_SP2, - LIGHT_LUT_SP3, - LIGHT_LUT_SP4, - LIGHT_LUT_SP5, - LIGHT_LUT_SP6, - LIGHT_LUT_SP7, - LIGHT_LUT_DA0=0x10, - LIGHT_LUT_DA1, - LIGHT_LUT_DA2, - LIGHT_LUT_DA3, - LIGHT_LUT_DA4, - LIGHT_LUT_DA5, - LIGHT_LUT_DA6, - LIGHT_LUT_DA7, - LIGHT_LUT_COUNT - }; - //256 entries per LUT with each LUT as its own row forming a 2D image 256xLIGHT_LUT_COUNT - //Encoded in PICA native format - std::array lightingLUT; - //Used to prevent uploading the lighting_lut on every draw call - //Set to true when the CPU writes to the lighting_lut - //Set to false by the renderer when the lighting_lut is uploaded ot the GPU + + public: + // 256 entries per LUT with each LUT as its own row forming a 2D image 256 * LUT_COUNT + // Encoded in PICA native format + static constexpr size_t LightingLutSize = PICA::Lights::LUT_Count * 256; + std::array lightingLUT; + + // Used to prevent uploading the lighting_lut on every draw call + // Set to true when the CPU writes to the lighting_lut + // Set to false by the renderer when the lighting_lut is uploaded ot the GPU bool lightingLUTDirty = false; + GPU(Memory& mem); void initGraphicsContext() { renderer.initGraphicsContext(); } void getGraphicsContext() { renderer.getGraphicsContext(); } diff --git a/include/PICA/regs.hpp b/include/PICA/regs.hpp index b4ff3498..0337cc1f 100644 --- a/include/PICA/regs.hpp +++ b/include/PICA/regs.hpp @@ -167,6 +167,34 @@ namespace PICA { }; } + namespace Lights { + enum : u32 { + LUT_D0 = 0, + LUT_D1, + LUT_FR, + LUT_RB, + LUT_RG, + LUT_RR, + LUT_SP0 = 0x8, + LUT_SP1, + LUT_SP2, + LUT_SP3, + LUT_SP4, + LUT_SP5, + LUT_SP6, + LUT_SP7, + LUT_DA0 = 0x10, + LUT_DA1, + LUT_DA2, + LUT_DA3, + LUT_DA4, + LUT_DA5, + LUT_DA6, + LUT_DA7, + LUT_Count + }; + } + enum class TextureFmt : u32 { RGBA8 = 0x0, RGB8 = 0x1, diff --git a/src/core/PICA/regs.cpp b/src/core/PICA/regs.cpp index f04f44f4..7cc097de 100644 --- a/src/core/PICA/regs.cpp +++ b/src/core/PICA/regs.cpp @@ -24,23 +24,29 @@ void GPU::writeReg(u32 address, u32 value) { } u32 GPU::readInternalReg(u32 index) { + using namespace PICA::InternalRegs; + if (index > regNum) { Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index); return 0; } - using namespace PICA::InternalRegs; - if(index>=LightingLUTData0&&index<=LightingLUTData7){ - uint32_t ind = regs[LightingLUTIndex]; - uint32_t lut_id = (ind>>8)&(0x1f); - uint32_t lut_addr = ind&0xff; - uint32_t value = 0xffffffff; - if(lut_id= LightingLUTData0 && index <= LightingLUTData7) { + const uint32_t index = regs[LightingLUTIndex]; // Get full LUT index register + const uint32_t lutID = getBits<8, 5>(index); // Get which LUT we're actually writing to + uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to + uint32_t value = 0xffffffff; // Return value + + if (lutID < PICA::Lights::LUT_Count) { + value = lightingLUT[lutID * 256 + lutIndex]; } - lut_addr+=1; - regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff); + + // Increment the bottom 8 bits of the lighting LUT index register + lutIndex += 1; + regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff); return value; } + return regs[index]; } @@ -111,16 +117,21 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) { case LightingLUTData5: case LightingLUTData6: case LightingLUTData7:{ - uint32_t ind = regs[LightingLUTIndex]; - uint32_t lut_id = (ind>>8)&(0x1f); - uint32_t lut_addr = ind&0xff; - if(lut_id(index); // Get which LUT we're actually writing to + uint32_t lutIndex = getBits<0, 8>(index); // And get the index inside the LUT we're writing to + + if (lutID < PICA::Lights::LUT_Count) { + lightingLUT[lutID * 256 + lutIndex] = newValue; lightingLUTDirty = true; } - lut_addr+=1; - regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff); - } break; + + // Increment the bottom 8 bits of the lighting LUT index register + lutIndex += 1; + regs[LightingLUTIndex] = (index & ~0xff) | (lutIndex & 0xff); + + break; + } case VertexFloatUniformIndex: shaderUnit.vs.setFloatUniformIndex(value); diff --git a/src/core/renderer_gl/renderer_gl.cpp b/src/core/renderer_gl/renderer_gl.cpp index 0e7aa4b3..b5350ddc 100644 --- a/src/core/renderer_gl/renderer_gl.cpp +++ b/src/core/renderer_gl/renderer_gl.cpp @@ -5,8 +5,7 @@ using namespace Floats; using namespace Helpers; - -// This is all hacked up to display our first triangle +using namespace PICA; const char* vertexShader = R"( #version 410 core @@ -223,6 +222,7 @@ const char* fragmentShader = R"( return result; } + #define D0_LUT 0u #define D1_LUT 1u #define SP_LUT 2u @@ -281,7 +281,7 @@ const char* fragmentShader = R"( primary_color = vec4(vec3(0.0),1.0); secondary_color = vec4(vec3(0.0),1.0); - primary_color.rgb+= regToColor(GPUREG_LIGHTING_AMBIENT); + primary_color.rgb += regToColor(GPUREG_LIGHTING_AMBIENT); uint GPUREG_LIGHTING_LUTINPUT_ABS = readPicaReg(0x01D0); uint GPUREG_LIGHTING_LUTINPUT_SELECT = readPicaReg(0x01D1); @@ -753,8 +753,8 @@ void Renderer::bindTexturesToSlots() { tex.bind(); } - glActiveTexture(GL_TEXTURE0+3); - glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray); + glActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); glActiveTexture(GL_TEXTURE0); // Update the texture unit configuration uniform if it changed @@ -765,20 +765,22 @@ void Renderer::bindTexturesToSlots() { } } void Renderer::updateLightingLUT(){ - std::array u16_lightinglut; - for(int i=0;i u16_lightinglut; + + for(int i = 0; i < gpu.lightingLUT.size(); i++){ + uint64_t value = gpu.lightingLUT[i] & ((1 << 12) - 1); + u16_lightinglut[i] = value * 65535 / 4095; } - glActiveTexture(GL_TEXTURE0+3); - glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray); - glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, gpu.LIGHT_LUT_COUNT,0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); + + glActiveTexture(GL_TEXTURE0 + 3); + glBindTexture(GL_TEXTURE_1D_ARRAY, lightLUTTextureArray); + glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, Lights::LUT_Count, 0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data()); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glActiveTexture(GL_TEXTURE0+0); - gpu.lightingLUTDirty=false; + glActiveTexture(GL_TEXTURE0); + gpu.lightingLUTDirty = false; } void Renderer::drawVertices(PICA::PrimType primType, std::span vertices) { @@ -841,9 +843,14 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span setupTextureEnvState(); bindTexturesToSlots(); - //Upload Pica Registers - glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]); - if(gpu.lightingLUTDirty)updateLightingLUT(); + + // Upload PICA Registers as a single uniform. The shader needs access to the rasterizer registers (for depth, starting from index 0x47) + // The texturing and the fragment lighting registers. Therefore we upload them all in one go to avoid multiple slow uniform updates + glUniform1uiv(picaRegLoc, 0x200 - 0x47, ®s[0x47]); + + if (gpu.lightingLUTDirty) { + updateLightingLUT(); + } // TODO: Actually use this float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;