PICA: Add LITP test + interpreter implementation

This commit is contained in:
wheremyfoodat 2024-08-02 22:53:51 +03:00
parent eacd9fdbff
commit 195f3388e9
5 changed files with 490 additions and 1 deletions

View file

@ -23,7 +23,7 @@ namespace ShaderOpcodes {
DST = 0x04,
EX2 = 0x05,
LG2 = 0x06,
LIT = 0x07,
LITP = 0x07,
MUL = 0x08,
SGE = 0x09,
SLT = 0x0A,
@ -161,6 +161,7 @@ class PICAShader {
void jmpc(u32 instruction);
void jmpu(u32 instruction);
void lg2(u32 instruction);
void litp(u32 instruction);
void loop(u32 instruction);
void mad(u32 instruction);
void madi(u32 instruction);

View file

@ -74,6 +74,9 @@ void PICAShader::run() {
break;
}
// Undocumented, implementation based on 3DBrew and hw testing (see tests/PICA_LITP)
case ShaderOpcodes::LITP: [[unlikely]] litp(instruction); break;
default: Helpers::panic("Unimplemented PICA instruction %08X (Opcode = %02X)", instruction, opcode);
}
@ -753,4 +756,33 @@ void PICAShader::jmpu(u32 instruction) {
if (((boolUniform >> bit) & 1) == test) // Jump if the bool uniform is the value we want
pc = dest;
}
void PICAShader::litp(u32 instruction) {
const u32 operandDescriptor = operandDescriptors[instruction & 0x7f];
u32 src = getBits<12, 7>(instruction);
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
src = getIndexedSource(src, idx);
vec4f srcVec = getSourceSwizzled<1>(src, operandDescriptor);
vec4f& destVector = getDest(dest);
// Compare registers are set based on whether src.x and src.w are >= 0.0
cmpRegister[0] = (srcVec[0].toFloat32() >= 0.0f);
cmpRegister[1] = (srcVec[3].toFloat32() >= 0.0f);
vec4f result;
// TODO: Does max here have the same non-IEEE NaN behavior as the max instruction?
result[0] = f24::fromFloat32(std::max(srcVec[0].toFloat32(), 0.0f));
result[1] = f24::fromFloat32(std::clamp(srcVec[1].toFloat32(), -127.9961f, 127.9961f));
result[2] = f24::zero();
result[3] = f24::fromFloat32(std::max(srcVec[3].toFloat32(), 0.0f));
u32 componentMask = operandDescriptor & 0xf;
for (int i = 0; i < 4; i++) {
if (componentMask & (1 << i)) {
destVector[3 - i] = result[3 - i];
}
}
}

255
tests/PICA_LITP/Makefile Normal file
View file

@ -0,0 +1,255 @@
#---------------------------------------------------------------------------------
.SUFFIXES:
#---------------------------------------------------------------------------------
ifeq ($(strip $(DEVKITARM)),)
$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
endif
TOPDIR ?= $(CURDIR)
include $(DEVKITARM)/3ds_rules
#---------------------------------------------------------------------------------
# TARGET is the name of the output
# BUILD is the directory where object files & intermediate files will be placed
# SOURCES is a list of directories containing source code
# DATA is a list of directories containing data files
# INCLUDES is a list of directories containing header files
# GRAPHICS is a list of directories containing graphics files
# GFXBUILD is the directory where converted graphics files will be placed
# If set to $(BUILD), it will statically link in the converted
# files as if they were data files.
#
# NO_SMDH: if set to anything, no SMDH file is generated.
# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional)
# APP_TITLE is the name of the app stored in the SMDH file (Optional)
# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
# ICON is the filename of the icon (.png), relative to the project folder.
# If not set, it attempts to use one of the following (in this order):
# - <Project name>.png
# - icon.png
# - <libctru folder>/default_icon.png
#---------------------------------------------------------------------------------
TARGET := $(notdir $(CURDIR))
BUILD := build
SOURCES := source
DATA := data
INCLUDES := include
GRAPHICS := gfx
GFXBUILD := $(BUILD)
#ROMFS := romfs
#GFXBUILD := $(ROMFS)/gfx
#---------------------------------------------------------------------------------
# options for code generation
#---------------------------------------------------------------------------------
ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft
CFLAGS := -g -Wall -O2 -mword-relocations \
-ffunction-sections \
$(ARCH)
CFLAGS += $(INCLUDE) -D__3DS__
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
ASFLAGS := -g $(ARCH)
LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
LIBS := -lcitro3d -lctru -lm
#---------------------------------------------------------------------------------
# list of directories containing libraries, this must be the top level containing
# include and lib
#---------------------------------------------------------------------------------
LIBDIRS := $(CTRULIB)
#---------------------------------------------------------------------------------
# no real need to edit anything past this point unless you need to add additional
# rules for different file extensions
#---------------------------------------------------------------------------------
ifneq ($(BUILD),$(notdir $(CURDIR)))
#---------------------------------------------------------------------------------
export OUTPUT := $(CURDIR)/$(TARGET)
export TOPDIR := $(CURDIR)
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
$(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir)) \
$(foreach dir,$(DATA),$(CURDIR)/$(dir))
export DEPSDIR := $(CURDIR)/$(BUILD)
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica)))
SHLISTFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.shlist)))
GFXFILES := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.t3s)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
# use CXX for linking C++ projects, CC for standard C
#---------------------------------------------------------------------------------
ifeq ($(strip $(CPPFILES)),)
#---------------------------------------------------------------------------------
export LD := $(CC)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export LD := $(CXX)
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
ifeq ($(GFXBUILD),$(BUILD))
#---------------------------------------------------------------------------------
export T3XFILES := $(GFXFILES:.t3s=.t3x)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export ROMFS_T3XFILES := $(patsubst %.t3s, $(GFXBUILD)/%.t3x, $(GFXFILES))
export T3XHFILES := $(patsubst %.t3s, $(BUILD)/%.h, $(GFXFILES))
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
export OFILES_SOURCES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export OFILES_BIN := $(addsuffix .o,$(BINFILES)) \
$(PICAFILES:.v.pica=.shbin.o) $(SHLISTFILES:.shlist=.shbin.o) \
$(addsuffix .o,$(T3XFILES))
export OFILES := $(OFILES_BIN) $(OFILES_SOURCES)
export HFILES := $(PICAFILES:.v.pica=_shbin.h) $(SHLISTFILES:.shlist=_shbin.h) \
$(addsuffix .h,$(subst .,_,$(BINFILES))) \
$(GFXFILES:.t3s=.h)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
-I$(CURDIR)/$(BUILD)
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
export _3DSXDEPS := $(if $(NO_SMDH),,$(OUTPUT).smdh)
ifeq ($(strip $(ICON)),)
icons := $(wildcard *.png)
ifneq (,$(findstring $(TARGET).png,$(icons)))
export APP_ICON := $(TOPDIR)/$(TARGET).png
else
ifneq (,$(findstring icon.png,$(icons)))
export APP_ICON := $(TOPDIR)/icon.png
endif
endif
else
export APP_ICON := $(TOPDIR)/$(ICON)
endif
ifeq ($(strip $(NO_SMDH)),)
export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
endif
ifneq ($(ROMFS),)
export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS)
endif
.PHONY: all clean
#---------------------------------------------------------------------------------
all: $(BUILD) $(GFXBUILD) $(DEPSDIR) $(ROMFS_T3XFILES) $(T3XHFILES)
@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
$(BUILD):
@mkdir -p $@
ifneq ($(GFXBUILD),$(BUILD))
$(GFXBUILD):
@mkdir -p $@
endif
ifneq ($(DEPSDIR),$(BUILD))
$(DEPSDIR):
@mkdir -p $@
endif
#---------------------------------------------------------------------------------
clean:
@echo clean ...
@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(GFXBUILD)
#---------------------------------------------------------------------------------
$(GFXBUILD)/%.t3x $(BUILD)/%.h : %.t3s
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@tex3ds -i $< -H $(BUILD)/$*.h -d $(DEPSDIR)/$*.d -o $(GFXBUILD)/$*.t3x
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
# main targets
#---------------------------------------------------------------------------------
$(OUTPUT).3dsx : $(OUTPUT).elf $(_3DSXDEPS)
$(OFILES_SOURCES) : $(HFILES)
$(OUTPUT).elf : $(OFILES)
#---------------------------------------------------------------------------------
# you need a rule like this for each extension you use as binary data
#---------------------------------------------------------------------------------
%.bin.o %_bin.h : %.bin
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
#---------------------------------------------------------------------------------
.PRECIOUS : %.t3x
#---------------------------------------------------------------------------------
%.t3x.o %_t3x.h : %.t3x
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
#---------------------------------------------------------------------------------
# rules for assembling GPU shaders
#---------------------------------------------------------------------------------
define shader-as
$(eval CURBIN := $*.shbin)
$(eval DEPSFILE := $(DEPSDIR)/$*.shbin.d)
echo "$(CURBIN).o: $< $1" > $(DEPSFILE)
echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
picasso -o $(CURBIN) $1
bin2s $(CURBIN) | $(AS) -o $*.shbin.o
endef
%.shbin.o %_shbin.h : %.v.pica %.g.pica
@echo $(notdir $^)
@$(call shader-as,$^)
%.shbin.o %_shbin.h : %.v.pica
@echo $(notdir $<)
@$(call shader-as,$<)
%.shbin.o %_shbin.h : %.shlist
@echo $(notdir $<)
@$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)$(file)))
#---------------------------------------------------------------------------------
%.t3x %.h : %.t3s
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@tex3ds -i $< -H $*.h -d $*.d -o $*.t3x
-include $(DEPSDIR)/*.d
#---------------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------------

View file

@ -0,0 +1,128 @@
#include <3ds.h>
#include <citro3d.h>
#include <string.h>
#include "vshader_shbin.h"
#define CLEAR_COLOR 0x68B0D8FF
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static C3D_Mtx projection;
static void sceneInit(void)
{
// Load the vertex shader, create a shader program and bind it
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
C3D_BindProgram(&program);
// Get the location of the uniforms
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
// Configure attributes for use with the vertex shader
// Attribute format and element count are ignored in immediate mode
C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
AttrInfo_Init(attrInfo);
AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 3); // v1=color
// Compute the projection matrix
Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
C3D_TexEnv* env = C3D_GetTexEnv(0);
C3D_TexEnvInit(env);
C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
}
static void sceneRender(void)
{
// Update the uniforms
C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
// Draw the triangle directly
C3D_ImmDrawBegin(GPU_TRIANGLES);
// Triangle 1
// This vertex has r >= 0 and a >= 0 so the shader should output magenta (cmp.x = cmp.y = 1)
C3D_ImmSendAttrib(200.0f, 200.0f, 0.5f, 0.0f); // v0=position
C3D_ImmSendAttrib(1.0f, 0.0f, 0.0f, 1.0f); // v1=color
// This vertex only has a >= 0, so the shader should output lime (cmp.x = 0, cmp.y = 1)
C3D_ImmSendAttrib(100.0f, 40.0f, 0.5f, 0.0f);
C3D_ImmSendAttrib(-0.5f, 1.0f, 0.0f, 1.0f);
// This vertex only has r >= 0, so the shader should output cyan (cmp.x = 1, cmp.y = 0)
C3D_ImmSendAttrib(300.0f, 40.0f, 0.5f, 0.0f);
C3D_ImmSendAttrib(0.5f, 0.0f, 1.0f, -1.0f);
// Triangle 2
// The next 3 vertices have r < 0, a < 0, so the output of the shader should be the output of litp with alpha set to 1 (cmp.x = cmp.y = 0)
C3D_ImmSendAttrib(10.0f, 20.0f, 0.5f, 0.0f);
// Output g component should be 64 / 128 = 0.5
C3D_ImmSendAttrib(-1.0f, 64.0f, 0.0f, -1.0f);
C3D_ImmSendAttrib(90.0f, 20.0f, 0.5f, 0.0f);
// Output g component should be 128 / 128 = 1.0
C3D_ImmSendAttrib(-1.0f, 256.0f, 1.0f, -1.0f);
C3D_ImmSendAttrib(40.0f, 40.0f, 0.5f, 0.0f);
// Output g component should be 0 / 128 = 0
C3D_ImmSendAttrib(-1.0f, 0.0f, 0.5f, -1.0f);
C3D_ImmDrawEnd();
}
static void sceneExit(void)
{
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
}
int main()
{
// Initialize graphics
gfxInitDefault();
C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
// Initialize the render target
C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
// Initialize the scene
sceneInit();
// Main loop
while (aptMainLoop())
{
hidScanInput();
// Respond to user input
u32 kDown = hidKeysDown();
if (kDown & KEY_START)
break; // break in order to return to hbmenu
// Render the scene
C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
C3D_FrameDrawOn(target);
sceneRender();
C3D_FrameEnd(0);
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
C3D_Fini();
gfxExit();
return 0;
}

View file

@ -0,0 +1,73 @@
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
.constf magenta(0.8, 0.192, 0.812, 1.0)
.constf cyan(0.137, 0.949, 0.906, 1.0)
.constf lime(0.286, 0.929, 0.412, 1.0)
.constf normalize_y(1.0, 1.0/128.0, 1.0, 1.0)
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.bool test
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; Test litp via the output fragment colour
; r1 = input colour
mov r1, inclr
; This should perform the following operation:
; cmp = (x >= 0, w >= 0)
; dest = ( max(x, 0), clamp(y, -128, +128 ), 0, max(w, 0) );
litp r2, r1
ifc cmp.x
ifc cmp.y
; cmp.x = 1, cmp.y = 1, write magenta
mov outclr, magenta
end
.else
; cmp.x = 1, cmp.y = 0, write cyan
mov outclr, cyan
end
.end
.else
ifc cmp.y
; cmp.x = 0, cmp.y
mov outclr, lime
end
.end
.end
; cmp.x 0, cmp.y = 0, write output of litp to out colour, with y normalized to [-1, 1]
mul r2.xyz, normalize_y, r2
; Set alpha to one
mov r2.a, ones.a
mov outclr, r2
end
.end