mirror of
https://github.com/wheremyfoodat/Panda3DS.git
synced 2025-04-18 03:31:31 +12:00
[GL] Initial Implementation of Lighting LUTs
This commit is contained in:
parent
769e90a45f
commit
12dbaeaeb0
6 changed files with 122 additions and 18 deletions
|
@ -70,6 +70,38 @@ class GPU {
|
||||||
Renderer renderer;
|
Renderer renderer;
|
||||||
PicaVertex getImmediateModeVertex();
|
PicaVertex getImmediateModeVertex();
|
||||||
public:
|
public:
|
||||||
|
enum : u32 {
|
||||||
|
LIGHT_LUT_D0=0,
|
||||||
|
LIGHT_LUT_D1,
|
||||||
|
LIGHT_LUT_FR,
|
||||||
|
LIGHT_LUT_RB,
|
||||||
|
LIGHT_LUT_RG,
|
||||||
|
LIGHT_LUT_RR,
|
||||||
|
LIGHT_LUT_SP0=0x8,
|
||||||
|
LIGHT_LUT_SP1,
|
||||||
|
LIGHT_LUT_SP2,
|
||||||
|
LIGHT_LUT_SP3,
|
||||||
|
LIGHT_LUT_SP4,
|
||||||
|
LIGHT_LUT_SP5,
|
||||||
|
LIGHT_LUT_SP6,
|
||||||
|
LIGHT_LUT_SP7,
|
||||||
|
LIGHT_LUT_DA0=0x10,
|
||||||
|
LIGHT_LUT_DA1,
|
||||||
|
LIGHT_LUT_DA2,
|
||||||
|
LIGHT_LUT_DA3,
|
||||||
|
LIGHT_LUT_DA4,
|
||||||
|
LIGHT_LUT_DA5,
|
||||||
|
LIGHT_LUT_DA6,
|
||||||
|
LIGHT_LUT_DA7,
|
||||||
|
LIGHT_LUT_COUNT
|
||||||
|
};
|
||||||
|
//256 entries per LUT with each LUT as its own row forming a 2D image 256xLIGHT_LUT_COUNT
|
||||||
|
//Encoded in PICA native format
|
||||||
|
std::array<uint32_t,LIGHT_LUT_COUNT*256> lightingLUT;
|
||||||
|
//Used to prevent uploading the lighting_lut on every draw call
|
||||||
|
//Set to true when the CPU writes to the lighting_lut
|
||||||
|
//Set to false by the renderer when the lighting_lut is uploaded ot the GPU
|
||||||
|
bool lightingLUTDirty = false;
|
||||||
GPU(Memory& mem);
|
GPU(Memory& mem);
|
||||||
void initGraphicsContext() { renderer.initGraphicsContext(); }
|
void initGraphicsContext() { renderer.initGraphicsContext(); }
|
||||||
void getGraphicsContext() { renderer.getGraphicsContext(); }
|
void getGraphicsContext() { renderer.getGraphicsContext(); }
|
||||||
|
|
|
@ -55,6 +55,17 @@ namespace PICA {
|
||||||
ColourBufferLoc = 0x11D,
|
ColourBufferLoc = 0x11D,
|
||||||
FramebufferSize = 0x11E,
|
FramebufferSize = 0x11E,
|
||||||
|
|
||||||
|
//LightingRegs
|
||||||
|
LightingLUTIndex = 0x01C5,
|
||||||
|
LightingLUTData0 = 0x01C8,
|
||||||
|
LightingLUTData1 = 0x01C9,
|
||||||
|
LightingLUTData2 = 0x01CA,
|
||||||
|
LightingLUTData3 = 0x01CB,
|
||||||
|
LightingLUTData4 = 0x01CC,
|
||||||
|
LightingLUTData5 = 0x01CD,
|
||||||
|
LightingLUTData6 = 0x01CE,
|
||||||
|
LightingLUTData7 = 0x01CF,
|
||||||
|
|
||||||
// Geometry pipeline registers
|
// Geometry pipeline registers
|
||||||
VertexAttribLoc = 0x200,
|
VertexAttribLoc = 0x200,
|
||||||
AttribFormatLow = 0x201,
|
AttribFormatLow = 0x201,
|
||||||
|
|
|
@ -67,6 +67,7 @@ class Renderer {
|
||||||
const std::array<u32, regNum>& regs;
|
const std::array<u32, regNum>& regs;
|
||||||
|
|
||||||
OpenGL::Texture screenTexture;
|
OpenGL::Texture screenTexture;
|
||||||
|
GLuint lightLUTTextureArray;
|
||||||
OpenGL::Framebuffer screenFramebuffer;
|
OpenGL::Framebuffer screenFramebuffer;
|
||||||
|
|
||||||
OpenGL::Framebuffer getColourFBO();
|
OpenGL::Framebuffer getColourFBO();
|
||||||
|
@ -77,6 +78,7 @@ class Renderer {
|
||||||
void bindDepthBuffer();
|
void bindDepthBuffer();
|
||||||
void setupTextureEnvState();
|
void setupTextureEnvState();
|
||||||
void bindTexturesToSlots();
|
void bindTexturesToSlots();
|
||||||
|
void updateLightingLUT();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
|
Renderer(GPU& gpu, const std::array<u32, regNum>& internalRegs) : gpu(gpu), regs(internalRegs) {}
|
||||||
|
|
|
@ -21,6 +21,7 @@ void GPU::reset() {
|
||||||
shaderUnit.reset();
|
shaderUnit.reset();
|
||||||
shaderJIT.reset();
|
shaderJIT.reset();
|
||||||
std::memset(vram, 0, vramSize);
|
std::memset(vram, 0, vramSize);
|
||||||
|
lightingLUT.fill(0);
|
||||||
|
|
||||||
totalAttribCount = 0;
|
totalAttribCount = 0;
|
||||||
fixedAttribMask = 0;
|
fixedAttribMask = 0;
|
||||||
|
|
|
@ -28,7 +28,19 @@ u32 GPU::readInternalReg(u32 index) {
|
||||||
Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index);
|
Helpers::panic("Tried to read invalid GPU register. Index: %X\n", index);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
using namespace PICA::InternalRegs;
|
||||||
|
if(index>=LightingLUTData0&&index<=LightingLUTData7){
|
||||||
|
uint32_t ind = regs[LightingLUTIndex];
|
||||||
|
uint32_t lut_id = (ind>>8)&(0x1f);
|
||||||
|
uint32_t lut_addr = ind&0xff;
|
||||||
|
uint32_t value = 0xffffffff;
|
||||||
|
if(lut_id<LIGHT_LUT_COUNT){
|
||||||
|
value = lightingLUT[lut_id*256+lut_addr];
|
||||||
|
}
|
||||||
|
lut_addr+=1;
|
||||||
|
regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
return regs[index];
|
return regs[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,6 +103,25 @@ void GPU::writeInternalReg(u32 index, u32 value, u32 mask) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case LightingLUTData0:
|
||||||
|
case LightingLUTData1:
|
||||||
|
case LightingLUTData2:
|
||||||
|
case LightingLUTData3:
|
||||||
|
case LightingLUTData4:
|
||||||
|
case LightingLUTData5:
|
||||||
|
case LightingLUTData6:
|
||||||
|
case LightingLUTData7:{
|
||||||
|
uint32_t ind = regs[LightingLUTIndex];
|
||||||
|
uint32_t lut_id = (ind>>8)&(0x1f);
|
||||||
|
uint32_t lut_addr = ind&0xff;
|
||||||
|
if(lut_id<LIGHT_LUT_COUNT){
|
||||||
|
lightingLUT[lut_id*256+lut_addr]=newValue;
|
||||||
|
lightingLUTDirty = true;
|
||||||
|
}
|
||||||
|
lut_addr+=1;
|
||||||
|
regs[LightingLUTIndex]=(ind&~0xff)|(lut_addr&0xff);
|
||||||
|
} break;
|
||||||
|
|
||||||
case VertexFloatUniformIndex:
|
case VertexFloatUniformIndex:
|
||||||
shaderUnit.vs.setFloatUniformIndex(value);
|
shaderUnit.vs.setFloatUniformIndex(value);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -101,6 +101,7 @@ const char* fragmentShader = R"(
|
||||||
uniform sampler2D u_tex0;
|
uniform sampler2D u_tex0;
|
||||||
uniform sampler2D u_tex1;
|
uniform sampler2D u_tex1;
|
||||||
uniform sampler2D u_tex2;
|
uniform sampler2D u_tex2;
|
||||||
|
uniform sampler1DArray u_tex_lighting_lut;
|
||||||
|
|
||||||
uniform uint u_picaRegs[0x200-0x47];
|
uniform uint u_picaRegs[0x200-0x47];
|
||||||
|
|
||||||
|
@ -218,9 +219,10 @@ const char* fragmentShader = R"(
|
||||||
#define RG_LUT 5u
|
#define RG_LUT 5u
|
||||||
#define RR_LUT 6u
|
#define RR_LUT 6u
|
||||||
|
|
||||||
float lutLookup(uint lut, float value){
|
float lutLookup(uint lut, uint light, float value){
|
||||||
//TODO: Implement this.
|
if(lut>=FR_LUT&&lut<=RR_LUT)lut-=1;
|
||||||
return value;
|
if(lut==SP_LUT)lut=8+light;
|
||||||
|
return texture(u_tex_lighting_lut,vec2(value,lut)).r;
|
||||||
}
|
}
|
||||||
vec3 regToColor(uint reg){
|
vec3 regToColor(uint reg){
|
||||||
return vec3(
|
return vec3(
|
||||||
|
@ -306,17 +308,17 @@ const char* fragmentShader = R"(
|
||||||
|
|
||||||
vec3 half_vector = normalize(normalize(light_vector)+view);
|
vec3 half_vector = normalize(normalize(light_vector)+view);
|
||||||
|
|
||||||
for(int i=0;i<7u;++i){
|
for(int c=0;c<7u;++c){
|
||||||
if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+i,1)==0){
|
if(bitfieldExtract(GPUREG_LIGHTING_CONFIG1,16+c,1)==0){
|
||||||
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,i*4,3);
|
uint scale_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SCALE,c*4,3);
|
||||||
float scale = float(1u<<scale_id);
|
float scale = float(1u<<scale_id);
|
||||||
if(scale_id>=6u) scale/=256.0;
|
if(scale_id>=6u) scale/=256.0;
|
||||||
|
|
||||||
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,i*4,3);
|
uint input_id = bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_SELECT,c*4,3);
|
||||||
if(input_id==0u)d[i] = dot(normal,half_vector);
|
if(input_id==0u)d[c] = dot(normal,half_vector);
|
||||||
else if(input_id==1u)d[i] = dot(view,half_vector);
|
else if(input_id==1u)d[c] = dot(view,half_vector);
|
||||||
else if(input_id==2u)d[i] = dot(normal,view);
|
else if(input_id==2u)d[c] = dot(normal,view);
|
||||||
else if(input_id==3u)d[i] = dot(light_vector,normal);
|
else if(input_id==3u)d[c] = dot(light_vector,normal);
|
||||||
else if(input_id==4u){
|
else if(input_id==4u){
|
||||||
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id);
|
uint GPUREG_LIGHTi_SPOTDIR_LOW = readPicaReg(0x0146+0x10*light_id);
|
||||||
uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id);
|
uint GPUREG_LIGHTi_SPOTDIR_HIGH= readPicaReg(0x0147+0x10*light_id);
|
||||||
|
@ -325,16 +327,17 @@ const char* fragmentShader = R"(
|
||||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11),
|
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_LOW,16,16),1,11),
|
||||||
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11)
|
decodeFP(bitfieldExtract(GPUREG_LIGHTi_SPOTDIR_HIGH,0,16),1,11)
|
||||||
);
|
);
|
||||||
d[i] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP);
|
d[c] = dot(-light_vector,spot_light_vector);// -L . P (aka Spotlight aka SP);
|
||||||
}else if(input_id==5u){
|
}else if(input_id==5u){
|
||||||
d[i] = 1.0;//TODO: cos <greek symbol> (aka CP);
|
d[c] = 1.0;//TODO: cos <greek symbol> (aka CP);
|
||||||
error_unimpl = true;
|
error_unimpl = true;
|
||||||
}else d[i] = 1.0;
|
}else d[c] = 1.0;
|
||||||
|
|
||||||
d[i] = lutLookup(i,d[i])*scale;
|
d[c] = lutLookup(c,light_id,d[c]*0.5+0.5)*scale;
|
||||||
if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*i,1)!=0u)d[i]=abs(d[i]);
|
if(bitfieldExtract(GPUREG_LIGHTING_LUTINPUT_ABS,2*c,1)!=0u)d[c]=abs(d[c]);
|
||||||
}else d[i]=1.0;
|
}else d[c]=1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
|
uint lookup_config = bitfieldExtract(GPUREG_LIGHTi_CONFIG,4,4);
|
||||||
if(lookup_config==0){
|
if(lookup_config==0){
|
||||||
d[D1_LUT] = 1.0;
|
d[D1_LUT] = 1.0;
|
||||||
|
@ -421,6 +424,8 @@ const char* fragmentShader = R"(
|
||||||
if (tevUnimplementedSourceFlag) {
|
if (tevUnimplementedSourceFlag) {
|
||||||
// fragColour = vec4(1.0, 0.0, 1.0, 1.0);
|
// fragColour = vec4(1.0, 0.0, 1.0, 1.0);
|
||||||
}
|
}
|
||||||
|
//fragColour.rg = texture(u_tex_lighting_lut,vec2(gl_FragCoord.x/200.,float(int(gl_FragCoord.y/2)%24))).rr;
|
||||||
|
|
||||||
|
|
||||||
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
|
// Get original depth value by converting from [near, far] = [0, 1] to [-1, 1]
|
||||||
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
|
// We do this by converting to [0, 2] first and subtracting 1 to go to [-1, 1]
|
||||||
|
@ -566,6 +571,7 @@ void Renderer::initGraphicsContext() {
|
||||||
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
|
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex0"), 0);
|
||||||
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
|
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex1"), 1);
|
||||||
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
|
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex2"), 2);
|
||||||
|
glUniform1i(OpenGL::uniformLocation(triangleProgram, "u_tex_lighting_lut"), 3);
|
||||||
|
|
||||||
OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
|
OpenGL::Shader vertDisplay(displayVertexShader, OpenGL::Vertex);
|
||||||
OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
|
OpenGL::Shader fragDisplay(displayFragmentShader, OpenGL::Fragment);
|
||||||
|
@ -610,6 +616,8 @@ void Renderer::initGraphicsContext() {
|
||||||
// Create texture and framebuffer for the 3DS screen
|
// Create texture and framebuffer for the 3DS screen
|
||||||
const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320
|
const u32 screenTextureWidth = 2 * 400; // Top screen is 400 pixels wide, bottom is 320
|
||||||
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
|
const u32 screenTextureHeight = 2 * 240; // Both screens are 240 pixels tall
|
||||||
|
|
||||||
|
glGenTextures(1,&lightLUTTextureArray);
|
||||||
|
|
||||||
auto prevTexture = OpenGL::getTex2D();
|
auto prevTexture = OpenGL::getTex2D();
|
||||||
screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
|
screenTexture.create(screenTextureWidth, screenTextureHeight, GL_RGBA8);
|
||||||
|
@ -739,6 +747,8 @@ void Renderer::bindTexturesToSlots() {
|
||||||
tex.bind();
|
tex.bind();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE0+3);
|
||||||
|
glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray);
|
||||||
glActiveTexture(GL_TEXTURE0);
|
glActiveTexture(GL_TEXTURE0);
|
||||||
|
|
||||||
// Update the texture unit configuration uniform if it changed
|
// Update the texture unit configuration uniform if it changed
|
||||||
|
@ -748,6 +758,22 @@ void Renderer::bindTexturesToSlots() {
|
||||||
glUniform1ui(texUnitConfigLoc, texUnitConfig);
|
glUniform1ui(texUnitConfigLoc, texUnitConfig);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void Renderer::updateLightingLUT(){
|
||||||
|
std::array<u16, sizeof(gpu.lightingLUT)/sizeof(gpu.lightingLUT[0])> u16_lightinglut;
|
||||||
|
for(int i=0;i<gpu.lightingLUT.size();++i){
|
||||||
|
uint64_t value = gpu.lightingLUT[i]&((1<<12)-1);
|
||||||
|
u16_lightinglut[i] = value*65535/4095;
|
||||||
|
}
|
||||||
|
glActiveTexture(GL_TEXTURE0+3);
|
||||||
|
glBindTexture(GL_TEXTURE_1D_ARRAY,lightLUTTextureArray);
|
||||||
|
glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_R16, 256, gpu.LIGHT_LUT_COUNT,0, GL_RED, GL_UNSIGNED_SHORT, u16_lightinglut.data());
|
||||||
|
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||||
|
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||||
|
glActiveTexture(GL_TEXTURE0+0);
|
||||||
|
gpu.lightingLUTDirty=false;
|
||||||
|
}
|
||||||
|
|
||||||
void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) {
|
void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex> vertices) {
|
||||||
// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
|
// The fourth type is meant to be "Geometry primitive". TODO: Find out what that is
|
||||||
|
@ -811,6 +837,7 @@ void Renderer::drawVertices(PICA::PrimType primType, std::span<const PicaVertex>
|
||||||
bindTexturesToSlots();
|
bindTexturesToSlots();
|
||||||
//Upload Pica Registers
|
//Upload Pica Registers
|
||||||
glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]);
|
glUniform1uiv(picaRegLoc,0x200-0x47,®s[0x47]);
|
||||||
|
if(gpu.lightingLUTDirty)updateLightingLUT();
|
||||||
|
|
||||||
// TODO: Actually use this
|
// TODO: Actually use this
|
||||||
float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;
|
float viewportWidth = f24::fromRaw(regs[PICA::InternalRegs::ViewportWidth] & 0xffffff).toFloat32() * 2.0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue