Skip to content

Commit

Permalink
SPU LLVM Precompilation
Browse files Browse the repository at this point in the history
  • Loading branch information
elad335 committed Aug 27, 2023
1 parent a6a9026 commit c3c1e91
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 51 deletions.
9 changes: 8 additions & 1 deletion rpcs3/Emu/Cell/PPUModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,13 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&

if (prog.p_type == 0x1u /* LOAD */ && prog.p_filesz > 0u)
{
if (prog.p_vaddr && !mod.cache.empty())
{
extern void emplace_spu_data_section(u32 vaddr, const void* ls_data_vaddr, u32 size);

emplace_spu_data_section(prog.p_vaddr, (elf_header + prog.p_offset), prog.p_filesz);
}

sha1_update(&sha2, (elf_header + prog.p_offset), prog.p_filesz);
}

Expand All @@ -1119,7 +1126,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment&

if (!name.empty())
{
fmt::append(dump, "\n\tSPUNAME: '%s'", name);
fmt::append(dump, "\n\tSPUNAME: '%s' (image addr: 0x%x)", name, seg.addr + i);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/Cell/PPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4030,7 +4030,7 @@ extern void ppu_initialize()

const std::string mount_point = vfs::get("/dev_flash/");

bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point) && g_cfg.core.ppu_llvm_precompilation;
bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point) && g_cfg.core.llvm_precompilation;

if (compile_fw || dev_flash_located)
{
Expand All @@ -4050,7 +4050,7 @@ extern void ppu_initialize()
}

// Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO
if (compile_main && g_cfg.core.ppu_llvm_precompilation && !Emu.GetTitleID().empty() && !Emu.IsChildProcess())
if (compile_main && g_cfg.core.llvm_precompilation && !Emu.GetTitleID().empty() && !Emu.IsChildProcess())
{
// Try to add all related directories
const std::set<std::string> dirs = Emu.GetGameDirs();
Expand Down
199 changes: 159 additions & 40 deletions rpcs3/Emu/Cell/SPURecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,65 @@ spu_cache::~spu_cache()
{
}

struct spu_section_data
{
struct data_t
{
u32 vaddr;
std::vector<u32> insts;
std::vector<u32> funcs;
};

shared_mutex mtx;
std::vector<data_t> data;
};

extern void emplace_spu_data_section(u32 vaddr, const void* ls_data_vaddr, u32 size)
{
if (vaddr % 4)
{
return;
}

size &= -4;

if (!size || vaddr + size > SPU_LS_SIZE)
{
return;
}

if (!g_cfg.core.llvm_precompilation)
{
return;
}

g_fxo->need<spu_section_data>();

std::vector<u32> data(size / 4);
std::memcpy(data.data(), ls_data_vaddr, size);

spu_section_data::data_t obj{vaddr, std::move(data)};

std::vector<u8> ls_data(SPU_LS_SIZE);
std::memcpy(ls_data.data() + vaddr, ls_data_vaddr, size);

obj.funcs = spu_thread::recover_functions(ls_data.data(), umax);

if (obj.funcs.empty())
{
// Nothing to add
return;
}

for (u32 addr : obj.funcs)
{
spu_log.success("Found SPU function at: 0x%08x", addr);
}

std::lock_guard lock(g_fxo->get<spu_section_data>().mtx);
g_fxo->get<spu_section_data>().data.emplace_back(std::move(obj));
}

std::deque<spu_program> spu_cache::get()
{
std::deque<spu_program> result;
Expand Down Expand Up @@ -618,7 +677,8 @@ void spu_cache::initialize()
atomic_t<usz> fnext{};
atomic_t<u8> fail_flag{0};

auto entry_list = cache.get();
auto data_list = std::move(g_fxo->get<spu_section_data>().data);

atomic_t<usz> enext{};

if (g_cfg.core.spu_decoder == spu_decoder_type::dynamic || g_cfg.core.spu_decoder == spu_decoder_type::llvm)
Expand Down Expand Up @@ -660,7 +720,18 @@ void spu_cache::initialize()
thread_ctrl::wait_on(g_progr_ptotal, v);
}

g_progr_ptotal += ::size32(func_list);
u32 add_size = ::size32(func_list);

if (func_list.empty())
{
for (auto& sec : data_list)
{
add_size += sec.funcs.size();
}
}

g_progr_ptotal += add_size;

progr.emplace("Building SPU cache...");

worker_count = rpcs3::utils::get_max_threads();
Expand Down Expand Up @@ -747,6 +818,7 @@ void spu_cache::initialize()
{
// Likely, out of JIT memory. Signal to prevent further building.
fail_flag |= 1;
continue;
}

// Clear fake LS
Expand All @@ -755,63 +827,96 @@ void spu_cache::initialize()
result++;
}

for (usz func_i = enext++; func_i < func_list.size(); func_i = enext++, g_progr_pdone++)
if (!func_list.empty() || !g_cfg.core.llvm_precompilation)
{
const spu_program& func = std::as_const(func_list)[func_i];
// Cache has already been initiated or the user does not want to precompile SPU programs
break;
}

if (Emu.IsStopped() || fail_flag)
{
continue;
}
for (usz func_i = enext++;; func_i = enext++, g_progr_pdone++)
{
bool is_new_section = false;
bool is_end_section = false;
u32 add_size = 0;

// Get data start
const u32 start = func.lower_bound;
const u32 size0 = ::size32(func.data);
u32 func_addr = 0;
u32 sec_addr = umax;
std::vector<u32>* inst_data{};

be_t<u64> hash_start;
for (auto& sec : data_list)
{
sha1_context ctx;
u8 output[20];
if (func_i < add_size + sec.funcs.size())
{
is_new_section = func_i == add_size;
is_end_section = func_i == add_size + sec.funcs.size() - 1;

sha1_starts(&ctx);
sha1_update(&ctx, reinterpret_cast<const u8*>(func.data.data()), func.data.size() * 4);
sha1_finish(&ctx, output);
std::memcpy(&hash_start, output, sizeof(hash_start));
}
sec_addr = sec.vaddr;
func_addr = ::at32(sec.funcs, func_i - add_size);
inst_data = &sec.insts;
break;
}

// Check hash against allowed bounds
const bool inverse_bounds = g_cfg.core.spu_llvm_lower_bound > g_cfg.core.spu_llvm_upper_bound;
add_size += sec.funcs.size();
}

if ((!inverse_bounds && (hash_start < g_cfg.core.spu_llvm_lower_bound || hash_start > g_cfg.core.spu_llvm_upper_bound)) ||
(inverse_bounds && (hash_start < g_cfg.core.spu_llvm_lower_bound && hash_start > g_cfg.core.spu_llvm_upper_bound)))
if (sec_addr == umax)
{
spu_log.error("[Debug] Skipped function %s", fmt::base57(hash_start));
result++;
continue;
// End of compilation for thread
break;
}

// Initialize LS with function data only
for (u32 i = 0, pos = start; i < size0; i++, pos += 4)
if (is_new_section)
{
ls[pos / 4] = std::bit_cast<be_t<u32>>(func.data[i]);
// Initialize LS with the entire section data
for (u32 i = 0, pos = sec_addr; i < inst_data->size(); i++, pos += 4)
{
ls[pos / 4] = std::bit_cast<be_t<u32>>((*inst_data)[i]);
}
}

// Call analyser
spu_program func2 = compiler->analyse(ls.data(), func.entry_point);
spu_program func2 = compiler->analyse(ls.data(), func_addr);

if (func2 != func)
while (func2.data.empty())
{
spu_log.error("[0x%05x] SPU Analyser failed, %u vs %u", func2.entry_point, func2.data.size(), size0);
const u32 last_inst = std::bit_cast<be_t<u32>>(func2.data.back());
const u32 prog_size = func2.data.size();

if (!compiler->compile(std::move(func2)))
{
// Likely, out of JIT memory. Signal to prevent further building.
fail_flag |= 1;
break;
}

result++;

if (g_cfg.core.spu_block_size >= spu_block_size_type::mega)
{
// Should already take care of the entire function
break;
}

if (auto type = g_spu_itype.decode(last_inst);
type == spu_itype::BRSL || type == spu_itype::BRASL || type == spu_itype::BISL)
{
const u32 start_new = func_addr + prog_size * 4;

if (start_new < SPU_LS_SIZE && ls[start_new / 4] && g_spu_itype.decode(ls[start_new / 4]) != spu_itype::UNK)
{
spu_log.notice("Precompiling fallthrough to 0x%05x", start_new);
func2 = compiler->analyse(ls.data(), start_new);
func_addr = start_new;
}
}
}
else if (!compiler->compile(std::move(func2)))

if (is_end_section)
{
// Likely, out of JIT memory. Signal to prevent further building.
fail_flag |= 1;
// Clear fake LS
std::memset(ls.data() + sec_addr / 4, 0, 4 * inst_data->size());
}

// Clear fake LS
std::memset(ls.data() + start / 4, 0, 4 * (size0 - 1));

result++;
}

Expand Down Expand Up @@ -2045,10 +2150,16 @@ std::vector<u32> spu_thread::recover_functions(const void* ls_start, u32 /*entry

const u32 func = op_branch_targets(addr, op)[0];

if (std::count(addrs.begin(), addrs.end(), addrs)
if (std::count(addrs.begin(), addrs.end(), func))
{
continue;
}

addrs.push_back(func);
}

std::sort(addrs.begin(), addrs.end());

return addrs;
}

Expand Down Expand Up @@ -2795,6 +2906,8 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
}
}

spu_program result2 = result;

while (lsa > 0 || limit < 0x40000)
{
const u32 initial_size = ::size32(result.data);
Expand Down Expand Up @@ -3241,7 +3354,13 @@ spu_program spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
{
workload.clear();
workload.push_back(entry_point);
ensure(m_bbs.count(entry_point));
if (!m_bbs.count(entry_point))
{
std::string func_bad;
dump(result2, func_bad);
spu_log.error("%s", func_bad);
return {};
}

std::basic_string<u32> new_entries;

Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/system_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ struct cfg_root : cfg::node
cfg::string llvm_cpu{ this, "Use LLVM CPU" };
cfg::_int<0, 1024> llvm_threads{ this, "Max LLVM Compile Threads", 0 };
cfg::_bool ppu_llvm_greedy_mode{ this, "PPU LLVM Greedy Mode", false, false };
cfg::_bool ppu_llvm_precompilation{ this, "PPU LLVM Precompilation", true };
cfg::_bool llvm_precompilation{ this, "LLVM Precompilation", true };
cfg::_enum<thread_scheduler_mode> thread_scheduler{this, "Thread Scheduler Mode", thread_scheduler_mode::os};
cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false };
cfg::_enum<spu_decoder_type> spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm };
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/rpcs3qt/emu_settings_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ enum class emu_settings_type
SPUDebug,
MFCDebug,
MaxLLVMThreads,
PPULLVMPrecompilation,
LLVMPrecompilation,
EnableTSX,
AccurateGETLLAR,
AccurateSpuDMA,
Expand Down Expand Up @@ -204,7 +204,7 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
{ emu_settings_type::SPUDebug, { "Core", "SPU Debug"}},
{ emu_settings_type::MFCDebug, { "Core", "MFC Debug"}},
{ emu_settings_type::MaxLLVMThreads, { "Core", "Max LLVM Compile Threads"}},
{ emu_settings_type::PPULLVMPrecompilation, { "Core", "PPU LLVM Precompilation"}},
{ emu_settings_type::LLVMPrecompilation, { "Core", "LLVM Precompilation"}},
{ emu_settings_type::EnableTSX, { "Core", "Enable TSX"}},
{ emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}},
{ emu_settings_type::AccurateSpuDMA, { "Core", "Accurate SPU DMA"}},
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/rpcs3qt/settings_dialog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1452,8 +1452,8 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->fixupPPUVNAN, emu_settings_type::FixupPPUVNAN);
SubscribeTooltip(ui->fixupPPUVNAN, tooltips.settings.fixup_ppuvnan);

m_emu_settings->EnhanceCheckBox(ui->ppuPrecompilation, emu_settings_type::PPULLVMPrecompilation);
SubscribeTooltip(ui->ppuPrecompilation, tooltips.settings.ppu_precompilation);
m_emu_settings->EnhanceCheckBox(ui->llvmPrecompilation, emu_settings_type::LLVMPrecompilation);
SubscribeTooltip(ui->llvmPrecompilation, tooltips.settings.llvm_precompilation);

m_emu_settings->EnhanceCheckBox(ui->suspendSavestates, emu_settings_type::SuspendEmulationSavestateMode);
SubscribeTooltip(ui->suspendSavestates, tooltips.settings.suspend_savestates);
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/rpcs3qt/settings_dialog.ui
Original file line number Diff line number Diff line change
Expand Up @@ -2394,9 +2394,9 @@
</widget>
</item>
<item>
<widget class="QCheckBox" name="ppuPrecompilation">
<widget class="QCheckBox" name="llvmPrecompilation">
<property name="text">
<string>PPU LLVM Precompilation</string>
<string>PPU/SPU LLVM Precompilation</string>
</property>
</widget>
</item>
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/rpcs3qt/tooltips.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class Tooltips : public QObject
const QString ppu__static = tr("Interpreter (slow). Try this if PPU Recompiler (LLVM) doesn't work.");
const QString ppu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if PPU Recompiler (LLVM) doesn't work.");
const QString ppu_llvm = tr("Recompiles and caches the game's PPU code using the LLVM Recompiler once before running it for the first time.\nThis is by far the fastest option and should always be used.\nShould you face compatibility issues, fall back to one of the Interpreters and retry.\nIf unsure, use this option.");
const QString ppu_precompilation = tr("Searches the game's directory and precompiles extra PPU modules during boot.\nIf disabled, these modules will only be compiled when needed. Depending on the game, this might interrupt the gameplay unexpectedly and possibly frequently.\nOnly disable this if you want to get ingame more quickly.");
const QString llvm_precompilation = tr("Searches the game's directory and precompiles extra PPU and SPU modules during boot.\nIf disabled, these modules will only be compiled when needed. Depending on the game, this might interrupt the gameplay unexpectedly and possibly frequently.\nOnly disable this if you want to get ingame more quickly.");
const QString spu__static = tr("Interpreter (slow). Try this if SPU Recompiler (LLVM) doesn't work.");
const QString spu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if SPU Recompiler (LLVM) doesn't work.");
const QString spu_asmjit = tr("Recompiles the game's SPU code using the ASMJIT Recompiler.\nThis is the fast option with very good compatibility.\nIf unsure, use this option.");
Expand Down

0 comments on commit c3c1e91

Please sign in to comment.