Merge remote-tracking branch 'upstream/master' into moar-gpu

This commit is contained in:
wheremyfoodat 2023-08-03 20:26:12 +03:00
commit f84935142a
17 changed files with 698 additions and 62 deletions

View file

@ -91,6 +91,7 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
void recCMP(const PICAShader& shader, u32 instruction);
void recDP3(const PICAShader& shader, u32 instruction);
void recDP4(const PICAShader& shader, u32 instruction);
void recDPH(const PICAShader& shader, u32 instruction);
void recEMIT(const PICAShader& shader, u32 instruction);
void recEND(const PICAShader& shader, u32 instruction);
void recEX2(const PICAShader& shader, u32 instruction);
@ -111,7 +112,6 @@ class ShaderEmitter : public Xbyak::CodeGenerator {
void recRSQ(const PICAShader& shader, u32 instruction);
void recSETEMIT(const PICAShader& shader, u32 instruction);
void recSGE(const PICAShader& shader, u32 instruction);
void recSGEI(const PICAShader& shader, u32 instruction);
void recSLT(const PICAShader& shader, u32 instruction);
MAKE_LOG_FUNCTION(log, shaderJITLogger)

View file

@ -23,6 +23,7 @@ namespace ShaderOpcodes {
LG2 = 0x06,
LIT = 0x07,
MUL = 0x08,
SGE = 0x09,
SLT = 0x0A,
FLR = 0x0B,
MAX = 0x0C,

View file

@ -13,6 +13,7 @@ namespace ConfigMem {
Datetime0 = 0x1FF81020,
WifiMac = 0x1FF81060,
NetworkState = 0x1FF81067,
SliderState3D = 0x1FF81080,
LedState3D = 0x1FF81084,
BatteryState = 0x1FF81085,
Unknown1086 = 0x1FF81086,

View file

@ -52,6 +52,9 @@ class Kernel {
// Top 8 bits are the major version, bottom 8 are the minor version
u16 kernelVersion = 0;
// Shows whether a reschedule will be need
bool needReschedule = false;
Handle makeArbiter();
Handle makeProcess(u32 id);
Handle makePort(const char* name);
@ -73,7 +76,6 @@ private:
void switchThread(int newThreadIndex);
void sortThreads();
std::optional<int> getNextThread();
void switchToNextThread();
void rescheduleThreads();
bool canThreadRun(const Thread& t);
bool shouldWaitOnObject(KernelObject* object);
@ -126,6 +128,7 @@ private:
void getResourceLimit();
void getResourceLimitLimitValues();
void getResourceLimitCurrentValues();
void getSystemInfo();
void getSystemTick();
void getThreadID();
void getThreadPriority();
@ -168,6 +171,15 @@ public:
void serviceSVC(u32 svc);
void reset();
void requireReschedule() { needReschedule = true; }
void evalReschedule() {
if (needReschedule) {
needReschedule = false;
rescheduleThreads();
}
}
Handle makeObject(KernelObjectType type) {
if (handleCounter > KernelHandles::Max) [[unlikely]] {
Helpers::panic("Hlep we somehow created enough kernel objects to overflow this thing");

View file

@ -143,6 +143,7 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
break;
case ShaderOpcodes::DP3: recDP3(shaderUnit, instruction); break;
case ShaderOpcodes::DP4: recDP4(shaderUnit, instruction); break;
case ShaderOpcodes::DPH: recDPH(shaderUnit, instruction); break;
case ShaderOpcodes::END: recEND(shaderUnit, instruction); break;
case ShaderOpcodes::EX2: recEX2(shaderUnit, instruction); break;
case ShaderOpcodes::FLR: recFLR(shaderUnit, instruction); break;
@ -179,6 +180,10 @@ void ShaderEmitter::compileInstruction(const PICAShader& shaderUnit) {
case ShaderOpcodes::SLTI:
recSLT(shaderUnit, instruction); break;
case ShaderOpcodes::SGE:
case ShaderOpcodes::SGEI:
recSGE(shaderUnit, instruction); break;
default:
Helpers::panic("Shader JIT: Unimplemented PICA opcode %X", opcode);
}
@ -525,6 +530,30 @@ void ShaderEmitter::recDP4(const PICAShader& shader, u32 instruction) {
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recDPH(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);
const u32 src2 = getBits<7, 5>(instruction); // src2 coming first because PICA moment
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
// TODO: Safe multiplication equivalent (Multiplication is not IEEE compliant on the PICA)
loadRegister<1>(src1_xmm, shader, src1, idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, 0, operandDescriptor);
// Attach 1.0 to the w component of src1
if (haveSSE4_1) {
blendps(src1_xmm, xword[rip + onesVector], 0b1000);
} else {
movaps(scratch1, src1_xmm);
unpckhps(scratch1, xword[rip + onesVector]);
unpcklpd(src1_xmm, scratch1);
}
dpps(src1_xmm, src2_xmm, 0b11111111); // 4-lane dot product between the 2 registers, store the result in all lanes of scratch1 similarly to PICA
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recMAX(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);
@ -656,6 +685,24 @@ void ShaderEmitter::recSLT(const PICAShader& shader, u32 instruction) {
storeRegister(src1_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recSGE(const PICAShader& shader, u32 instruction) {
const bool isSGEI = (instruction >> 26) == ShaderOpcodes::SGEI;
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = isSGEI ? getBits<14, 5>(instruction) : getBits<12, 7>(instruction);
const u32 src2 = isSGEI ? getBits<7, 7>(instruction) : getBits<7, 5>(instruction);
const u32 idx = getBits<19, 2>(instruction);
const u32 dest = getBits<21, 5>(instruction);
loadRegister<1>(src1_xmm, shader, src1, isSGEI ? 0 : idx, operandDescriptor);
loadRegister<2>(src2_xmm, shader, src2, isSGEI ? idx : 0, operandDescriptor);
// SSE does not have a cmpgeps instruction so we turn src1 >= src2 to src2 <= src1, result in src2
cmpleps(src2_xmm, src1_xmm);
andps(src2_xmm, xword[rip + onesVector]);
storeRegister(src2_xmm, shader, dest, operandDescriptor);
}
void ShaderEmitter::recCMP(const PICAShader& shader, u32 instruction) {
const u32 operandDescriptor = shader.operandDescriptors[instruction & 0x7f];
const u32 src1 = getBits<12, 7>(instruction);

View file

@ -87,7 +87,7 @@ void Kernel::arbitrateAddress() {
Helpers::panic("ArbitrateAddress: Unimplemented type %s", arbitrationTypeToString(type));
}
rescheduleThreads();
requireReschedule();
}
// Signal up to "threadCount" threads waiting on the arbiter indicated by "waitingAddress"

View file

@ -35,22 +35,15 @@ bool Kernel::signalEvent(Handle handle) {
// Check if there's any thread waiting on this event
if (event->waitlist != 0) {
// One-shot events get cleared once they are acquired by some thread and only wake up 1 thread at a time
wakeupAllThreads(event->waitlist, handle);
event->waitlist = 0; // No threads waiting;
if (event->resetType == ResetType::OneShot) {
int index = wakeupOneThread(event->waitlist, handle); // Wake up one thread with the highest priority
event->waitlist ^= (1ull << index); // Remove thread from waitlist
event->fired = false;
} else {
wakeupAllThreads(event->waitlist, handle);
event->waitlist = 0; // No threads waiting;
}
// We must reschedule our threads if we signalled one. Some games such as FE: Awakening rely on this
// If this does not happen, we can have phenomena such as a thread waiting up a higher priority thread,
// and the higher priority thread just never running
rescheduleThreads();
}
rescheduleThreads();
return true;
}
@ -121,7 +114,6 @@ void Kernel::waitSynchronization1() {
if (!shouldWaitOnObject(object)) {
acquireSyncObject(object, threads[currentThreadIndex]); // Acquire the object since it's ready
regs[0] = Result::Success;
rescheduleThreads();
} else {
// Timeout is 0, don't bother waiting, instantly timeout
if (ns == 0) {
@ -141,7 +133,7 @@ void Kernel::waitSynchronization1() {
// Add the current thread to the object's wait list
object->getWaitlist() |= (1ull << currentThreadIndex);
switchToNextThread();
requireReschedule();
}
}
@ -204,14 +196,13 @@ void Kernel::waitSynchronizationN() {
auto& t = threads[currentThreadIndex];
// We only need to wait on one object. Easy...?!
// We only need to wait on one object. Easy.
if (!waitAll) {
// If there's ready objects, acquire the first one and return
if (oneObjectReady) {
regs[0] = Result::Success;
regs[1] = firstReadyObjectIndex; // Return index of the acquired object
acquireSyncObject(waitObjects[firstReadyObjectIndex].second, t); // Acquire object
rescheduleThreads();
return;
}
@ -229,8 +220,8 @@ void Kernel::waitSynchronizationN() {
waitObjects[i].second->getWaitlist() |= (1ull << currentThreadIndex); // And add the thread to the object's waitlist
}
switchToNextThread();
requireReschedule();
} else {
Helpers::panic("WaitSynchronizatioN with waitAll");
Helpers::panic("WaitSynchronizationN with waitAll");
}
}

View file

@ -50,6 +50,7 @@ void Kernel::serviceSVC(u32 svc) {
case 0x25: waitSynchronizationN(); break;
case 0x27: duplicateHandle(); break;
case 0x28: getSystemTick(); break;
case 0x2A: getSystemInfo(); break;
case 0x2B: getProcessInfo(); break;
case 0x2D: connectToPort(); break;
case 0x32: sendSyncRequest(); break;
@ -61,6 +62,8 @@ void Kernel::serviceSVC(u32 svc) {
case 0x3D: outputDebugString(); break;
default: Helpers::panic("Unimplemented svc: %X @ %08X", svc, regs[15]); break;
}
evalReschedule();
}
void Kernel::setVersion(u8 major, u8 minor) {
@ -140,6 +143,8 @@ void Kernel::reset() {
threadIndices.clear();
serviceManager.reset();
needReschedule = false;
// Allocate handle #0 to a dummy object and make a main process object
makeObject(KernelObjectType::Dummy);
currentProcess = makeProcess(1); // Use ID = 1 for main process
@ -249,6 +254,82 @@ void Kernel::duplicateHandle() {
}
}
namespace SystemInfoType {
enum : u32 {
// Gets information related to Citra (We don't implement this, we just report this emulator is not Citra)
CitraInformation = 0x20000,
// Gets information related to this emulator
PandaInformation = 0x20001,
};
};
namespace CitraInfoType {
enum : u32 {
IsCitra = 0,
BuildName = 10, // (ie: Nightly, Canary).
BuildVersion = 11, // Build version.
BuildDate1 = 20, // Build date first 7 characters.
BuildDate2 = 21, // Build date next 7 characters.
BuildDate3 = 22, // Build date next 7 characters.
BuildDate4 = 23, // Build date last 7 characters.
BuildBranch1 = 30, // Git branch first 7 characters.
BuildBranch2 = 31, // Git branch last 7 characters.
BuildDesc1 = 40, // Git description (commit) first 7 characters.
BuildDesc2 = 41, // Git description (commit) last 7 characters.
};
}
namespace PandaInfoType {
enum : u32 {
IsPanda = 0,
};
}
void Kernel::getSystemInfo() {
const u32 infoType = regs[1];
const u32 subtype = regs[2];
log("GetSystemInfo (type = %X, subtype = %X)\n", infoType, subtype);
regs[0] = Result::Success;
switch (infoType) {
case SystemInfoType::CitraInformation: {
switch (subtype) {
case CitraInfoType::IsCitra:
// Report that we're not Citra
regs[1] = 0;
regs[2] = 0;
break;
default:
Helpers::warn("GetSystemInfo: Unknown CitraInformation subtype %x\n", subtype);
regs[0] = Result::FailurePlaceholder;
break;
}
break;
}
case SystemInfoType::PandaInformation: {
switch (subtype) {
case PandaInfoType::IsPanda:
// This is indeed us, set output to 1
regs[1] = 1;
regs[2] = 0;
break;
default:
Helpers::warn("GetSystemInfo: Unknown PandaInformation subtype %x\n", subtype);
regs[0] = Result::FailurePlaceholder;
break;
}
break;
}
default: Helpers::panic("GetSystemInfo: Unknown system info type: %x (subtype: %x)\n", infoType, subtype); break;
}
}
std::string Kernel::getProcessName(u32 pid) {
if (pid == KernelHandles::CurrentProcess) {
return "current";

View file

@ -76,6 +76,11 @@ void Kernel::sendSyncRequest() {
u32 messagePointer = getTLSPointer() + 0x80; // The message is stored starting at TLS+0x80
logSVC("SendSyncRequest(session handle = %X)\n", handle);
// Service calls via SendSyncRequest and file access needs to put the caller to sleep for a given amount of time
// To make sure that the other threads don't get starved. Various games rely on this (including Sonic Boom: Shattering Crystal it seems)
constexpr u64 syncRequestDelayNs = 39000;
sleepThread(syncRequestDelayNs);
// The sync request is being sent at a service rather than whatever port, so have the service manager intercept it
if (KernelHandles::isServiceHandle(handle)) {
// The service call might cause a reschedule and change threads. Hence, set r0 before executing the service call

View file

@ -82,32 +82,26 @@ std::optional<int> Kernel::getNextThread() {
return std::nullopt;
}
void Kernel::switchToNextThread() {
std::optional<int> newThreadIndex = getNextThread();
if (!newThreadIndex.has_value()) {
log("Kernel tried to switch to the next thread but none found. Switching to random thread\n");
assert(aliveThreadCount != 0);
Helpers::panic("rpog");
int index;
do {
index = rand() % threadCount;
} while (threads[index].status == ThreadStatus::Dead); // TODO: Pray this doesn't hang
switchThread(index);
} else {
switchThread(newThreadIndex.value());
}
}
// See if there;s a higher priority, ready thread and switch to that
// See if there is a higher priority, ready thread and switch to that
void Kernel::rescheduleThreads() {
Thread& current = threads[currentThreadIndex]; // Current running thread
// If the current thread is running and hasn't gone to sleep or whatever, set it to Ready instead of Running
// So that getNextThread will evaluate it properly
if (current.status == ThreadStatus::Running) {
current.status = ThreadStatus::Ready;
}
ThreadStatus currentStatus = current.status;
std::optional<int> newThreadIndex = getNextThread();
if (newThreadIndex.has_value() && newThreadIndex.value() != currentThreadIndex) {
threads[currentThreadIndex].status = ThreadStatus::Ready;
// Case 1: A thread can run
if (newThreadIndex.has_value()) {
switchThread(newThreadIndex.value());
}
// Case 2: No other thread can run, straight to the idle thread
else {
switchThread(idleThreadIndex);
}
}
@ -184,6 +178,7 @@ void Kernel::releaseMutex(Mutex* moo) {
// If the lock count reached 0 then the thread no longer owns the mootex and it can be given to a new one
if (moo->lockCount == 0) {
moo->locked = false;
if (moo->waitlist != 0) {
int index = wakeupOneThread(moo->waitlist, moo->handle); // Wake up one thread and get its index
moo->waitlist ^= (1ull << index); // Remove thread from waitlist
@ -194,7 +189,7 @@ void Kernel::releaseMutex(Mutex* moo) {
moo->ownerThread = index;
}
rescheduleThreads();
requireReschedule();
}
}
@ -210,7 +205,7 @@ void Kernel::sleepThreadOnArbiter(u32 waitingAddress) {
t.status = ThreadStatus::WaitArbiter;
t.waitingAddress = waitingAddress;
switchToNextThread();
requireReschedule();
}
// Acquires an object that is **ready to be acquired** without waiting on it
@ -226,7 +221,13 @@ void Kernel::acquireSyncObject(KernelObject* object, const Thread& thread) {
case KernelObjectType::Mutex: {
Mutex* moo = object->getData<Mutex>();
moo->locked = true; // Set locked to true, whether it's false or not because who cares
// Only reschedule if we're acquiring the mutex for the first time
if (!moo->locked) {
moo->locked = true;
requireReschedule();
}
// Increment lock count by 1. If a thread acquires a mootex multiple times, it needs to release it until count == 0
// For the mootex to be free.
moo->lockCount++;
@ -338,20 +339,31 @@ void Kernel::wakeupAllThreads(u64 waitlist, Handle handle) {
void Kernel::sleepThread(s64 ns) {
if (ns < 0) {
Helpers::panic("Sleeping a thread for a negative amount of ns");
} else if (ns == 0) { // Used when we want to force a thread switch
std::optional<int> newThreadIndex = getNextThread();
// If there's no other thread waiting, don't bother yielding
if (newThreadIndex.has_value()) {
threads[currentThreadIndex].status = ThreadStatus::Ready;
switchThread(newThreadIndex.value());
}
} else { // If we're sleeping for > 0 ns
} else if (ns == 0) {
// TODO: This is garbage, but it works so eh we can keep it for now
Thread& t = threads[currentThreadIndex];
// See if a thread other than this and the idle thread is waiting to run by temp marking the current function as dead and searching
// If there is another thread to run, then run it. Otherwise, go back to this thread, not to the idle thread
t.status = ThreadStatus::Dead;
auto nextThreadIndex = getNextThread();
t.status = ThreadStatus::Ready;
if (nextThreadIndex.has_value()) {
const auto index = nextThreadIndex.value();
if (index != idleThreadIndex) {
switchThread(index);
}
}
} else { // If we're sleeping for >= 0 ns
Thread& t = threads[currentThreadIndex];
t.status = ThreadStatus::WaitSleep;
t.waitingNanoseconds = ns;
t.sleepTick = cpu.getTicks();
switchToNextThread();
requireReschedule();
}
}
@ -374,7 +386,7 @@ void Kernel::createThread() {
regs[0] = Result::Success;
regs[1] = makeThread(entrypoint, initialSP, priority, id, arg, ThreadStatus::Ready);
rescheduleThreads();
requireReschedule();
}
// void SleepThread(s64 nanoseconds)
@ -448,7 +460,7 @@ void Kernel::setThreadPriority() {
}
}
sortThreads();
rescheduleThreads();
requireReschedule();
}
void Kernel::exitThread() {
@ -472,7 +484,7 @@ void Kernel::exitThread() {
t.threadsWaitingForTermination = 0; // No other threads waiting
}
switchToNextThread();
requireReschedule();
}
void Kernel::svcCreateMutex() {

View file

@ -135,6 +135,10 @@ u32 Memory::read32(u32 vaddr) {
case ConfigMem::SyscoreVer: return 2;
case 0x1FF81000: return 0; // TODO: Figure out what this config mem address does
case ConfigMem::WifiMac: return 0xFF07F440; // Wifi MAC: First 4 bytes of MAC Address
// 3D slider. Float in range 0.0 = off, 1.0 = max.
case ConfigMem::SliderState3D: return Helpers::bit_cast<u32, float>(0.0f);
default:
if (vaddr >= VirtualAddrs::VramStart && vaddr < VirtualAddrs::VramStart + VirtualAddrs::VramSize) {
Helpers::warn("VRAM read!\n");

View file

@ -357,6 +357,8 @@ void Emulator::run() {
hid.updateInputs(cpu.getTicks());
}
// TODO: Should this be uncommented?
// kernel.evalReschedule();
// Update inputs in the HID module
SDL_GL_SwapWindow(window);

View file

@ -228,10 +228,18 @@ void calcLighting(out vec4 primary_color, out vec4 secondary_color) {
decodeFP(bitfieldExtract(GPUREG_LIGHTi_VECTOR_HIGH, 0, 16), 5, 10)
));
// Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) error_unimpl = true;
vec3 half_vector;
vec3 half_vector = normalize(normalize(light_vector) + view);
// Positional Light
if (bitfieldExtract(GPUREG_LIGHTi_CONFIG, 0, 1) == 0) {
error_unimpl = true;
// half_vector = normalize(normalize(light_vector + v_view) + view);
}
// Directional light
else {
half_vector = normalize(normalize(light_vector) + view);
}
for (int c = 0; c < 7; c++) {
if (bitfieldExtract(GPUREG_LIGHTING_CONFIG1, 16 + c, 1) == 0) {

View file

@ -0,0 +1,255 @@
#---------------------------------------------------------------------------------
.SUFFIXES:
#---------------------------------------------------------------------------------
ifeq ($(strip $(DEVKITARM)),)
$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
endif
TOPDIR ?= $(CURDIR)
include $(DEVKITARM)/3ds_rules
#---------------------------------------------------------------------------------
# TARGET is the name of the output
# BUILD is the directory where object files & intermediate files will be placed
# SOURCES is a list of directories containing source code
# DATA is a list of directories containing data files
# INCLUDES is a list of directories containing header files
# GRAPHICS is a list of directories containing graphics files
# GFXBUILD is the directory where converted graphics files will be placed
# If set to $(BUILD), it will statically link in the converted
# files as if they were data files.
#
# NO_SMDH: if set to anything, no SMDH file is generated.
# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional)
# APP_TITLE is the name of the app stored in the SMDH file (Optional)
# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
# ICON is the filename of the icon (.png), relative to the project folder.
# If not set, it attempts to use one of the following (in this order):
# - <Project name>.png
# - icon.png
# - <libctru folder>/default_icon.png
#---------------------------------------------------------------------------------
TARGET := $(notdir $(CURDIR))
BUILD := build
SOURCES := source
DATA := data
INCLUDES := include
GRAPHICS := gfx
GFXBUILD := $(BUILD)
#ROMFS := romfs
#GFXBUILD := $(ROMFS)/gfx
#---------------------------------------------------------------------------------
# options for code generation
#---------------------------------------------------------------------------------
ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft
CFLAGS := -g -Wall -O2 -mword-relocations \
-ffunction-sections \
$(ARCH)
CFLAGS += $(INCLUDE) -D__3DS__
CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
ASFLAGS := -g $(ARCH)
LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
LIBS := -lcitro3d -lctru -lm
#---------------------------------------------------------------------------------
# list of directories containing libraries, this must be the top level containing
# include and lib
#---------------------------------------------------------------------------------
LIBDIRS := $(CTRULIB)
#---------------------------------------------------------------------------------
# no real need to edit anything past this point unless you need to add additional
# rules for different file extensions
#---------------------------------------------------------------------------------
ifneq ($(BUILD),$(notdir $(CURDIR)))
#---------------------------------------------------------------------------------
export OUTPUT := $(CURDIR)/$(TARGET)
export TOPDIR := $(CURDIR)
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
$(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir)) \
$(foreach dir,$(DATA),$(CURDIR)/$(dir))
export DEPSDIR := $(CURDIR)/$(BUILD)
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica)))
SHLISTFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.shlist)))
GFXFILES := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.t3s)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
# use CXX for linking C++ projects, CC for standard C
#---------------------------------------------------------------------------------
ifeq ($(strip $(CPPFILES)),)
#---------------------------------------------------------------------------------
export LD := $(CC)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export LD := $(CXX)
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
#---------------------------------------------------------------------------------
ifeq ($(GFXBUILD),$(BUILD))
#---------------------------------------------------------------------------------
export T3XFILES := $(GFXFILES:.t3s=.t3x)
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
export ROMFS_T3XFILES := $(patsubst %.t3s, $(GFXBUILD)/%.t3x, $(GFXFILES))
export T3XHFILES := $(patsubst %.t3s, $(BUILD)/%.h, $(GFXFILES))
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
export OFILES_SOURCES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export OFILES_BIN := $(addsuffix .o,$(BINFILES)) \
$(PICAFILES:.v.pica=.shbin.o) $(SHLISTFILES:.shlist=.shbin.o) \
$(addsuffix .o,$(T3XFILES))
export OFILES := $(OFILES_BIN) $(OFILES_SOURCES)
export HFILES := $(PICAFILES:.v.pica=_shbin.h) $(SHLISTFILES:.shlist=_shbin.h) \
$(addsuffix .h,$(subst .,_,$(BINFILES))) \
$(GFXFILES:.t3s=.h)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
-I$(CURDIR)/$(BUILD)
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
export _3DSXDEPS := $(if $(NO_SMDH),,$(OUTPUT).smdh)
ifeq ($(strip $(ICON)),)
icons := $(wildcard *.png)
ifneq (,$(findstring $(TARGET).png,$(icons)))
export APP_ICON := $(TOPDIR)/$(TARGET).png
else
ifneq (,$(findstring icon.png,$(icons)))
export APP_ICON := $(TOPDIR)/icon.png
endif
endif
else
export APP_ICON := $(TOPDIR)/$(ICON)
endif
ifeq ($(strip $(NO_SMDH)),)
export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
endif
ifneq ($(ROMFS),)
export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS)
endif
.PHONY: all clean
#---------------------------------------------------------------------------------
all: $(BUILD) $(GFXBUILD) $(DEPSDIR) $(ROMFS_T3XFILES) $(T3XHFILES)
@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
$(BUILD):
@mkdir -p $@
ifneq ($(GFXBUILD),$(BUILD))
$(GFXBUILD):
@mkdir -p $@
endif
ifneq ($(DEPSDIR),$(BUILD))
$(DEPSDIR):
@mkdir -p $@
endif
#---------------------------------------------------------------------------------
clean:
@echo clean ...
@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(GFXBUILD)
#---------------------------------------------------------------------------------
$(GFXBUILD)/%.t3x $(BUILD)/%.h : %.t3s
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@tex3ds -i $< -H $(BUILD)/$*.h -d $(DEPSDIR)/$*.d -o $(GFXBUILD)/$*.t3x
#---------------------------------------------------------------------------------
else
#---------------------------------------------------------------------------------
# main targets
#---------------------------------------------------------------------------------
$(OUTPUT).3dsx : $(OUTPUT).elf $(_3DSXDEPS)
$(OFILES_SOURCES) : $(HFILES)
$(OUTPUT).elf : $(OFILES)
#---------------------------------------------------------------------------------
# you need a rule like this for each extension you use as binary data
#---------------------------------------------------------------------------------
%.bin.o %_bin.h : %.bin
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
#---------------------------------------------------------------------------------
.PRECIOUS : %.t3x
#---------------------------------------------------------------------------------
%.t3x.o %_t3x.h : %.t3x
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@$(bin2o)
#---------------------------------------------------------------------------------
# rules for assembling GPU shaders
#---------------------------------------------------------------------------------
define shader-as
$(eval CURBIN := $*.shbin)
$(eval DEPSFILE := $(DEPSDIR)/$*.shbin.d)
echo "$(CURBIN).o: $< $1" > $(DEPSFILE)
echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
picasso -o $(CURBIN) $1
bin2s $(CURBIN) | $(AS) -o $*.shbin.o
endef
%.shbin.o %_shbin.h : %.v.pica %.g.pica
@echo $(notdir $^)
@$(call shader-as,$^)
%.shbin.o %_shbin.h : %.v.pica
@echo $(notdir $<)
@$(call shader-as,$<)
%.shbin.o %_shbin.h : %.shlist
@echo $(notdir $<)
@$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)$(file)))
#---------------------------------------------------------------------------------
%.t3x %.h : %.t3s
#---------------------------------------------------------------------------------
@echo $(notdir $<)
@tex3ds -i $< -H $*.h -d $*.d -o $*.t3x
-include $(DEPSDIR)/*.d
#---------------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------------

View file

@ -0,0 +1,181 @@
#include <3ds.h>
#include <citro3d.h>
#include <string.h>
#include "vshader_shbin.h"
#define CLEAR_COLOR 0x68B0D8FF
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
typedef struct { float x, y, z; } vertex;
static const vertex vertex_list[] = {
{ 200.0f, 200.0f, 0.5f },
{ 100.0f, 40.0f, 0.5f },
{ 300.0f, 40.0f, 0.5f },
};
typedef enum {
Platform_Citra,
Platform_Panda,
Platform_Other
} Platform;
typedef enum {
SystemInfo_Citra = 0x20000,
SystemInfo_Panda = 0x20001
} SystemInfoType;
typedef enum {
SystemInfoSub_IsCitra = 0,
SystemInfoSub_IsPanda = 0,
} SystemInfoSubType;
// Detect the emulator this is running on
Platform getPlatform() {
s64 out;
// First, attempt to detect Citra
Result res = svcGetSystemInfo(&out, SystemInfo_Citra, SystemInfoSub_IsCitra);
if (R_SUCCEEDED(res) && out == 1) {
return Platform_Citra;
}
// Next, attempt to detect Panda3DS
res = svcGetSystemInfo(&out, SystemInfo_Panda, SystemInfoSub_IsPanda);
if (R_SUCCEEDED(res) && out == 1) {
return Platform_Panda;
}
// Unknown platform, maybe a console or another emulator
return Platform_Other;
}
// Print string in emulator terminal
static void emuPrint(const char* str) {
svcOutputDebugString(str, strlen(str));
}
#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static C3D_Mtx projection;
static void* vbo_data;
static void sceneInit(void) {
// Load the vertex shader, create a shader program and bind it
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
C3D_BindProgram(&program);
// Get the location of the uniforms
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
// Configure attributes for use with the vertex shader
C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
AttrInfo_Init(attrInfo);
AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
AttrInfo_AddFixed(attrInfo, 1); // v1=color
// Set the fixed attribute (color) to a colour depending on the emulator
Platform platform = getPlatform();
switch (platform) {
case Platform_Citra:
emuPrint("Detected Citra\n");
C3D_FixedAttribSet(1, 1.0, 1.0, 0.0, 1.0);
break;
case Platform_Panda:
emuPrint("Detected Panda3DS\n");
C3D_FixedAttribSet(1, 1.0, 0.0, 0.0, 1.0);
break;
default:
emuPrint("Unknown platform. Probably a real 3DS\n");
C3D_FixedAttribSet(1, 1.0, 0.5, 0.2, 1.0);
break;
}
// Compute the projection matrix
Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
// Create the VBO (vertex buffer object)
vbo_data = linearAlloc(sizeof(vertex_list));
memcpy(vbo_data, vertex_list, sizeof(vertex_list));
// Configure buffers
C3D_BufInfo* bufInfo = C3D_GetBufInfo();
BufInfo_Init(bufInfo);
BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
C3D_TexEnv* env = C3D_GetTexEnv(0);
C3D_TexEnvInit(env);
C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, 0, 0);
C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
}
static void sceneRender(void) {
// Update the uniforms
C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
// Draw the VBO
C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
}
static void sceneExit(void) {
// Free the VBO
linearFree(vbo_data);
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
}
int main() {
emuPrint("Entering main\n");
// Initialize graphics
gfxInitDefault();
C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
// Initialize the render target
C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
// Initialize the scene
sceneInit();
// Main loop
while (true)
{
// Render the scene
emuPrint("Entering C3D_FrameBegin");
C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
emuPrint("Clearing render target");
C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
emuPrint("Calling C3D_FrameDrawOn");
C3D_FrameDrawOn(target);
emuPrint("Calling sceneRender");
sceneRender();
emuPrint("Entering C3D_FrameEnd");
C3D_FrameEnd(0);
emuPrint("Exited C3D_FrameEnd");
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
C3D_Fini();
gfxExit();
return 0;
}

View file

@ -0,0 +1,36 @@
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outclr = inclr
mov outclr, inclr
; We're finished
end
.end

View file

@ -45,7 +45,7 @@ static void sceneInit(void)
AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
AttrInfo_AddFixed(attrInfo, 1); // v1=color
// Set the fixed attribute (color) to solid white
// Set the fixed attribute (color) to orange
C3D_FixedAttribSet(1, 1.0, 0.5, 0.2, 1.0);
// Compute the projection matrix