From 195f3388e9a5f88e18ee2779c71069c60668f544 Mon Sep 17 00:00:00 2001 From: wheremyfoodat <44909372+wheremyfoodat@users.noreply.github.com> Date: Fri, 2 Aug 2024 22:53:51 +0300 Subject: [PATCH] PICA: Add LITP test + interpreter implementation --- include/PICA/shader.hpp | 3 +- src/core/PICA/shader_interpreter.cpp | 32 ++++ tests/PICA_LITP/Makefile | 255 ++++++++++++++++++++++++++ tests/PICA_LITP/source/main.c | 128 +++++++++++++ tests/PICA_LITP/source/vshader.v.pica | 73 ++++++++ 5 files changed, 490 insertions(+), 1 deletion(-) create mode 100644 tests/PICA_LITP/Makefile create mode 100644 tests/PICA_LITP/source/main.c create mode 100644 tests/PICA_LITP/source/vshader.v.pica diff --git a/include/PICA/shader.hpp b/include/PICA/shader.hpp index 68b16de8..e5f57c72 100644 --- a/include/PICA/shader.hpp +++ b/include/PICA/shader.hpp @@ -23,7 +23,7 @@ namespace ShaderOpcodes { DST = 0x04, EX2 = 0x05, LG2 = 0x06, - LIT = 0x07, + LITP = 0x07, MUL = 0x08, SGE = 0x09, SLT = 0x0A, @@ -161,6 +161,7 @@ class PICAShader { void jmpc(u32 instruction); void jmpu(u32 instruction); void lg2(u32 instruction); + void litp(u32 instruction); void loop(u32 instruction); void mad(u32 instruction); void madi(u32 instruction); diff --git a/src/core/PICA/shader_interpreter.cpp b/src/core/PICA/shader_interpreter.cpp index 003ef97a..a85c7464 100644 --- a/src/core/PICA/shader_interpreter.cpp +++ b/src/core/PICA/shader_interpreter.cpp @@ -74,6 +74,9 @@ void PICAShader::run() { break; } + // Undocumented, implementation based on 3DBrew and hw testing (see tests/PICA_LITP) + case ShaderOpcodes::LITP: [[unlikely]] litp(instruction); break; + default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode); } @@ -753,4 +756,33 @@ void PICAShader::jmpu(u32 instruction) { if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want pc = dest; +} + +void PICAShader::litp(u32 instruction) { + const u32 operandDescriptor = operandDescriptors[instruction & 0x7f]; + u32 src = getBits<12, 7>(instruction); + const u32 idx = getBits<19, 2>(instruction); + const u32 dest = getBits<21, 5>(instruction); + + src = getIndexedSource(src, idx); + vec4f srcVec = getSourceSwizzled<1>(src, operandDescriptor); + vec4f& destVector = getDest(dest); + + // Compare registers are set based on whether src.x and src.w are >= 0.0 + cmpRegister[0] = (srcVec[0].toFloat32() >= 0.0f); + cmpRegister[1] = (srcVec[3].toFloat32() >= 0.0f); + + vec4f result; + // TODO: Does max here have the same non-IEEE NaN behavior as the max instruction? + result[0] = f24::fromFloat32(std::max(srcVec[0].toFloat32(), 0.0f)); + result[1] = f24::fromFloat32(std::clamp(srcVec[1].toFloat32(), -127.9961f, 127.9961f)); + result[2] = f24::zero(); + result[3] = f24::fromFloat32(std::max(srcVec[3].toFloat32(), 0.0f)); + + u32 componentMask = operandDescriptor & 0xf; + for (int i = 0; i < 4; i++) { + if (componentMask & (1 << i)) { + destVector[3 - i] = result[3 - i]; + } + } } \ No newline at end of file diff --git a/tests/PICA_LITP/Makefile b/tests/PICA_LITP/Makefile new file mode 100644 index 00000000..46a94048 --- /dev/null +++ b/tests/PICA_LITP/Makefile @@ -0,0 +1,255 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +TOPDIR ?= $(CURDIR) +include $(DEVKITARM)/3ds_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# GRAPHICS is a list of directories containing graphics files +# GFXBUILD is the directory where converted graphics files will be placed +# If set to $(BUILD), it will statically link in the converted +# files as if they were data files. +# +# NO_SMDH: if set to anything, no SMDH file is generated. +# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional) +# APP_TITLE is the name of the app stored in the SMDH file (Optional) +# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) +# APP_AUTHOR is the author of the app stored in the SMDH file (Optional) +# ICON is the filename of the icon (.png), relative to the project folder. +# If not set, it attempts to use one of the following (in this order): +# - .png +# - icon.png +# - /default_icon.png +#--------------------------------------------------------------------------------- +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include +GRAPHICS := gfx +GFXBUILD := $(BUILD) +#ROMFS := romfs +#GFXBUILD := $(ROMFS)/gfx + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft + +CFLAGS := -g -Wall -O2 -mword-relocations \ + -ffunction-sections \ + $(ARCH) + +CFLAGS += $(INCLUDE) -D__3DS__ + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11 + +ASFLAGS := -g $(ARCH) +LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) + +LIBS := -lcitro3d -lctru -lm + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(CTRULIB) + + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) +export TOPDIR := $(CURDIR) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica))) +SHLISTFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.shlist))) +GFXFILES := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.t3s))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +#--------------------------------------------------------------------------------- +ifeq ($(GFXBUILD),$(BUILD)) +#--------------------------------------------------------------------------------- +export T3XFILES := $(GFXFILES:.t3s=.t3x) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- +export ROMFS_T3XFILES := $(patsubst %.t3s, $(GFXBUILD)/%.t3x, $(GFXFILES)) +export T3XHFILES := $(patsubst %.t3s, $(BUILD)/%.h, $(GFXFILES)) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES_SOURCES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export OFILES_BIN := $(addsuffix .o,$(BINFILES)) \ + $(PICAFILES:.v.pica=.shbin.o) $(SHLISTFILES:.shlist=.shbin.o) \ + $(addsuffix .o,$(T3XFILES)) + +export OFILES := $(OFILES_BIN) $(OFILES_SOURCES) + +export HFILES := $(PICAFILES:.v.pica=_shbin.h) $(SHLISTFILES:.shlist=_shbin.h) \ + $(addsuffix .h,$(subst .,_,$(BINFILES))) \ + $(GFXFILES:.t3s=.h) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +export _3DSXDEPS := $(if $(NO_SMDH),,$(OUTPUT).smdh) + +ifeq ($(strip $(ICON)),) + icons := $(wildcard *.png) + ifneq (,$(findstring $(TARGET).png,$(icons))) + export APP_ICON := $(TOPDIR)/$(TARGET).png + else + ifneq (,$(findstring icon.png,$(icons))) + export APP_ICON := $(TOPDIR)/icon.png + endif + endif +else + export APP_ICON := $(TOPDIR)/$(ICON) +endif + +ifeq ($(strip $(NO_SMDH)),) + export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh +endif + +ifneq ($(ROMFS),) + export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS) +endif + +.PHONY: all clean + +#--------------------------------------------------------------------------------- +all: $(BUILD) $(GFXBUILD) $(DEPSDIR) $(ROMFS_T3XFILES) $(T3XHFILES) + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +$(BUILD): + @mkdir -p $@ + +ifneq ($(GFXBUILD),$(BUILD)) +$(GFXBUILD): + @mkdir -p $@ +endif + +ifneq ($(DEPSDIR),$(BUILD)) +$(DEPSDIR): + @mkdir -p $@ +endif + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(GFXBUILD) + +#--------------------------------------------------------------------------------- +$(GFXBUILD)/%.t3x $(BUILD)/%.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $(BUILD)/$*.h -d $(DEPSDIR)/$*.d -o $(GFXBUILD)/$*.t3x + +#--------------------------------------------------------------------------------- +else + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +$(OUTPUT).3dsx : $(OUTPUT).elf $(_3DSXDEPS) + +$(OFILES_SOURCES) : $(HFILES) + +$(OUTPUT).elf : $(OFILES) + +#--------------------------------------------------------------------------------- +# you need a rule like this for each extension you use as binary data +#--------------------------------------------------------------------------------- +%.bin.o %_bin.h : %.bin +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +.PRECIOUS : %.t3x +#--------------------------------------------------------------------------------- +%.t3x.o %_t3x.h : %.t3x +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @$(bin2o) + +#--------------------------------------------------------------------------------- +# rules for assembling GPU shaders +#--------------------------------------------------------------------------------- +define shader-as + $(eval CURBIN := $*.shbin) + $(eval DEPSFILE := $(DEPSDIR)/$*.shbin.d) + echo "$(CURBIN).o: $< $1" > $(DEPSFILE) + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h + echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h + echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h + picasso -o $(CURBIN) $1 + bin2s $(CURBIN) | $(AS) -o $*.shbin.o +endef + +%.shbin.o %_shbin.h : %.v.pica %.g.pica + @echo $(notdir $^) + @$(call shader-as,$^) + +%.shbin.o %_shbin.h : %.v.pica + @echo $(notdir $<) + @$(call shader-as,$<) + +%.shbin.o %_shbin.h : %.shlist + @echo $(notdir $<) + @$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)$(file))) + +#--------------------------------------------------------------------------------- +%.t3x %.h : %.t3s +#--------------------------------------------------------------------------------- + @echo $(notdir $<) + @tex3ds -i $< -H $*.h -d $*.d -o $*.t3x + +-include $(DEPSDIR)/*.d + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/tests/PICA_LITP/source/main.c b/tests/PICA_LITP/source/main.c new file mode 100644 index 00000000..3c2a3f0c --- /dev/null +++ b/tests/PICA_LITP/source/main.c @@ -0,0 +1,128 @@ +#include <3ds.h> +#include +#include +#include "vshader_shbin.h" + +#define CLEAR_COLOR 0x68B0D8FF + +#define DISPLAY_TRANSFER_FLAGS \ + (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ + GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO)) + +static DVLB_s* vshader_dvlb; +static shaderProgram_s program; +static int uLoc_projection; +static C3D_Mtx projection; + +static void sceneInit(void) +{ + // Load the vertex shader, create a shader program and bind it + vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size); + shaderProgramInit(&program); + shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]); + C3D_BindProgram(&program); + + // Get the location of the uniforms + uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection"); + + // Configure attributes for use with the vertex shader + // Attribute format and element count are ignored in immediate mode + C3D_AttrInfo* attrInfo = C3D_GetAttrInfo(); + AttrInfo_Init(attrInfo); + AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position + AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color + + // Compute the projection matrix + Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true); + + // Configure the first fragment shading substage to just pass through the vertex color + // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight + C3D_TexEnv* env = C3D_GetTexEnv(0); + C3D_TexEnvInit(env); + C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0); + C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE); +} + +static void sceneRender(void) +{ + // Update the uniforms + C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection); + + // Draw the triangle directly + C3D_ImmDrawBegin(GPU_TRIANGLES); + // Triangle 1 + // This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1) + C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position + C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color + + // This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1) + C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f); + + // This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0) + C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f); + C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f); + + // Triangle 2 + // The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0) + C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 64 / 128 = 0.5 + C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f); + + C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f); + // Output g component should be 128 / 128 = 1.0 + C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f); + + C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f); + // Output g component should be 0 / 128 = 0 + C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f); + C3D_ImmDrawEnd(); +} + +static void sceneExit(void) +{ + // Free the shader program + shaderProgramFree(&program); + DVLB_Free(vshader_dvlb); +} + +int main() +{ + // Initialize graphics + gfxInitDefault(); + C3D_Init(C3D_DEFAULT_CMDBUF_SIZE); + + // Initialize the render target + C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8); + C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + + // Initialize the scene + sceneInit(); + + // Main loop + while (aptMainLoop()) + { + hidScanInput(); + + // Respond to user input + u32 kDown = hidKeysDown(); + if (kDown & KEY_START) + break; // break in order to return to hbmenu + + // Render the scene + C3D_FrameBegin(C3D_FRAME_SYNCDRAW); + C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0); + C3D_FrameDrawOn(target); + sceneRender(); + C3D_FrameEnd(0); + } + + // Deinitialize the scene + sceneExit(); + + // Deinitialize graphics + C3D_Fini(); + gfxExit(); + return 0; +} \ No newline at end of file diff --git a/tests/PICA_LITP/source/vshader.v.pica b/tests/PICA_LITP/source/vshader.v.pica new file mode 100644 index 00000000..d745f939 --- /dev/null +++ b/tests/PICA_LITP/source/vshader.v.pica @@ -0,0 +1,73 @@ +; Example PICA200 vertex shader + +; Uniforms +.fvec projection[4] + +; Constants +.constf myconst(0.0, 1.0, -1.0, 0.1) +.constf myconst2(0.3, 0.0, 0.0, 0.0) +.alias zeros myconst.xxxx ; Vector full of zeros +.alias ones myconst.yyyy ; Vector full of ones + +.constf magenta(0.8, 0.192, 0.812, 1.0) +.constf cyan(0.137, 0.949, 0.906, 1.0) +.constf lime(0.286, 0.929, 0.412, 1.0) + +.constf normalize_y(1.0, 1.0/128.0, 1.0, 1.0) + +; Outputs +.out outpos position +.out outclr color + +; Inputs (defined as aliases for convenience) +.alias inpos v0 +.alias inclr v1 + +.bool test + +.proc main + ; Force the w component of inpos to be 1.0 + mov r0.xyz, inpos + mov r0.w, ones + + ; outpos = projectionMatrix * inpos + dp4 outpos.x, projection[0], r0 + dp4 outpos.y, projection[1], r0 + dp4 outpos.z, projection[2], r0 + dp4 outpos.w, projection[3], r0 + + ; Test litp via the output fragment colour + ; r1 = input colour + mov r1, inclr + + ; This should perform the following operation: + ; cmp = (x >= 0, w >= 0) + ; dest = ( max(x, 0), clamp(y, -128, +128 ), 0, max(w, 0) ); + litp r2, r1 + + ifc cmp.x + ifc cmp.y + ; cmp.x = 1, cmp.y = 1, write magenta + mov outclr, magenta + end + .else + ; cmp.x = 1, cmp.y = 0, write cyan + mov outclr, cyan + end + .end + .else + ifc cmp.y + ; cmp.x = 0, cmp.y + mov outclr, lime + end + .end + .end + + ; cmp.x 0, cmp.y = 0, write output of litp to out colour, with y normalized to [-1, 1] + mul r2.xyz, normalize_y, r2 + ; Set alpha to one + mov r2.a, ones.a + + mov outclr, r2 + end +.end \ No newline at end of file