diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index ff8f3c1c83f2..d71c003f32f8 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -1107,6 +1107,13 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment& if (prog.p_type == 0x1u /* LOAD */ && prog.p_filesz > 0u) { + if (prog.p_vaddr && !mod.cache.empty()) + { + extern void emplace_spu_data_section(u32 vaddr, const void* ls_data_vaddr, u32 size); + + emplace_spu_data_section(prog.p_vaddr, (elf_header + prog.p_offset), prog.p_filesz); + } + sha1_update(&sha2, (elf_header + prog.p_offset), prog.p_filesz); } @@ -1119,7 +1126,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu_segment& if (!name.empty()) { - fmt::append(dump, "\n\tSPUNAME: '%s'", name); + fmt::append(dump, "\n\tSPUNAME: '%s' (image addr: 0x%x)", name, seg.addr + i); } } } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 775695f0a2cc..a64f59ca8899 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -4030,7 +4030,7 @@ extern void ppu_initialize() const std::string mount_point = vfs::get("/dev_flash/"); - bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point) && g_cfg.core.ppu_llvm_precompilation; + bool dev_flash_located = !Emu.GetCat().ends_with('P') && Emu.IsPathInsideDir(Emu.GetBoot(), mount_point) && g_cfg.core.llvm_precompilation; if (compile_fw || dev_flash_located) { @@ -4050,7 +4050,7 @@ extern void ppu_initialize() } // Avoid compilation if main's cache exists or it is a standalone SELF with no PARAM.SFO - if (compile_main && g_cfg.core.ppu_llvm_precompilation && !Emu.GetTitleID().empty() && !Emu.IsChildProcess()) + if (compile_main && g_cfg.core.llvm_precompilation && !Emu.GetTitleID().empty() && !Emu.IsChildProcess()) { // Try to add all related directories const std::set dirs = Emu.GetGameDirs(); diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index acf8f41e388e..afde29f8174d 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -516,6 +516,65 @@ spu_cache::~spu_cache() { } +struct spu_section_data +{ + struct data_t + { + u32 vaddr; + std::vector insts; + std::vector funcs; + }; + + shared_mutex mtx; + std::vector data; +}; + +extern void emplace_spu_data_section(u32 vaddr, const void* ls_data_vaddr, u32 size) +{ + if (vaddr % 4) + { + return; + } + + size &= -4; + + if (!size || vaddr + size > SPU_LS_SIZE) + { + return; + } + + if (!g_cfg.core.llvm_precompilation) + { + return; + } + + g_fxo->need(); + + std::vector data(size / 4); + std::memcpy(data.data(), ls_data_vaddr, size); + + spu_section_data::data_t obj{vaddr, std::move(data)}; + + std::vector ls_data(SPU_LS_SIZE); + std::memcpy(ls_data.data() + vaddr, ls_data_vaddr, size); + + obj.funcs = spu_thread::recover_functions(ls_data.data(), umax); + + if (obj.funcs.empty()) + { + // Nothing to add + return; + } + + for (u32 addr : obj.funcs) + { + spu_log.success("Found SPU function at: 0x%08x", addr); + } + + std::lock_guard lock(g_fxo->get().mtx); + g_fxo->get().data.emplace_back(std::move(obj)); +} + std::deque spu_cache::get() { std::deque result; @@ -618,7 +677,8 @@ void spu_cache::initialize() atomic_t fnext{}; atomic_t fail_flag{0}; - auto entry_list = cache.get(); + auto data_list = std::move(g_fxo->get().data); + atomic_t enext{}; if (g_cfg.core.spu_decoder == spu_decoder_type::dynamic || g_cfg.core.spu_decoder == spu_decoder_type::llvm) @@ -660,7 +720,18 @@ void spu_cache::initialize() thread_ctrl::wait_on(g_progr_ptotal, v); } - g_progr_ptotal += ::size32(func_list); + u32 add_size = ::size32(func_list); + + if (func_list.empty()) + { + for (auto& sec : data_list) + { + add_size += sec.funcs.size(); + } + } + + g_progr_ptotal += add_size; + progr.emplace("Building SPU cache..."); worker_count = rpcs3::utils::get_max_threads(); @@ -747,6 +818,7 @@ void spu_cache::initialize() { // Likely, out of JIT memory. Signal to prevent further building. fail_flag |= 1; + continue; } // Clear fake LS @@ -755,64 +827,95 @@ void spu_cache::initialize() result++; } - for (usz func_i = enext++; func_i < func_list.size(); func_i = enext++, g_progr_pdone++) + if (!func_list.empty() || !g_cfg.core.llvm_precompilation) { - const spu_program& func = std::as_const(func_list)[func_i]; + // Cache has already been initiated or the user does not want to precompile SPU programs + break; + } - if (Emu.IsStopped() || fail_flag) - { - continue; - } + for (usz func_i = enext++;; func_i = enext++, g_progr_pdone++) + { + bool is_new_section = false; + bool is_end_section = false; + u32 add_size = 0; - // Get data start - const u32 start = func.lower_bound; - const u32 size0 = ::size32(func.data); + u32 func_addr = 0; + u32 sec_addr = umax; + std::vector* inst_data{}; - be_t hash_start; + for (auto& sec : data_list) { - sha1_context ctx; - u8 output[20]; + if (func_i < add_size + sec.funcs.size()) + { + is_new_section = func_i == add_size; + is_end_section = func_i == add_size + sec.funcs.size() - 1; - sha1_starts(&ctx); - sha1_update(&ctx, reinterpret_cast(func.data.data()), func.data.size() * 4); - sha1_finish(&ctx, output); - std::memcpy(&hash_start, output, sizeof(hash_start)); - } + sec_addr = sec.vaddr; + func_addr = ::at32(sec.funcs, func_i - add_size); + inst_data = &sec.insts; + break; + } - // Check hash against allowed bounds - const bool inverse_bounds = g_cfg.core.spu_llvm_lower_bound > g_cfg.core.spu_llvm_upper_bound; + add_size += sec.funcs.size(); + } - if ((!inverse_bounds && (hash_start < g_cfg.core.spu_llvm_lower_bound || hash_start > g_cfg.core.spu_llvm_upper_bound)) || - (inverse_bounds && (hash_start < g_cfg.core.spu_llvm_lower_bound && hash_start > g_cfg.core.spu_llvm_upper_bound))) + if (sec_addr == umax) { - spu_log.error("[Debug] Skipped function %s", fmt::base57(hash_start)); - result++; - continue; + // End of compilation for thread + break; } - // Initialize LS with function data only - for (u32 i = 0, pos = start; i < size0; i++, pos += 4) + if (is_new_section) { - ls[pos / 4] = std::bit_cast>(func.data[i]); + // Initialize LS with the entire section data + for (u32 i = 0, pos = sec_addr; i < inst_data->size(); i++, pos += 4) + { + ls[pos / 4] = std::bit_cast>((*inst_data)[i]); + } } // Call analyser - spu_program func2 = compiler->analyse(ls.data(), func.entry_point); + spu_program func2 = compiler->analyse(ls.data(), func_addr); - if (func2 != func) + while (func2.data.empty()) { - spu_log.error("[0x%05x] SPU Analyser failed, %u vs %u", func2.entry_point, func2.data.size(), size0); + const u32 last_inst = std::bit_cast>(func2.data.back()); + const u32 prog_size = func2.data.size(); + + if (!compiler->compile(std::move(func2))) + { + // Likely, out of JIT memory. Signal to prevent further building. + fail_flag |= 1; + break; + } + + result++; + + if (g_cfg.core.spu_block_size >= spu_block_size_type::mega) + { + // Should already take care of the entire function + break; + } + + if (auto type = g_spu_itype.decode(last_inst); + type == spu_itype::BRSL || type == spu_itype::BRASL || type == spu_itype::BISL) + { + const u32 start_new = func_addr + prog_size * 4; + + if (start_new < SPU_LS_SIZE && ls[start_new / 4] && g_spu_itype.decode(ls[start_new / 4]) != spu_itype::UNK) + { + spu_log.notice("Precompiling fallthrough to 0x%05x", start_new); + func2 = compiler->analyse(ls.data(), start_new); + func_addr = start_new; + } + } } - else if (!compiler->compile(std::move(func2))) + + if (is_end_section) { - // Likely, out of JIT memory. Signal to prevent further building. - fail_flag |= 1; + // Clear fake LS + std::memset(ls.data() + sec_addr / 4, 0, 4 * inst_data->size()); } - - // Clear fake LS - std::memset(ls.data() + start / 4, 0, 4 * (size0 - 1)); - - result++; } return result; @@ -2045,10 +2148,16 @@ std::vector spu_thread::recover_functions(const void* ls_start, u32 /*entry const u32 func = op_branch_targets(addr, op)[0]; - if (std::count(addrs.begin(), addrs.end(), addrs) + if (std::count(addrs.begin(), addrs.end(), func)) + { + continue; + } + addrs.push_back(func); } + std::sort(addrs.begin(), addrs.end()); + return addrs; } @@ -2795,6 +2904,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point) } } + spu_program result2 = result; + while (lsa > 0 || limit < 0x40000) { const u32 initial_size = ::size32(result.data); @@ -3241,7 +3352,13 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point) { workload.clear(); workload.push_back(entry_point); - ensure(m_bbs.count(entry_point)); + if (!m_bbs.count(entry_point)) + { + std::string func_bad; + dump(result2, func_bad); + spu_log.error("%s", func_bad); + return {}; + } std::basic_string new_entries; diff --git a/rpcs3/Emu/system_config.h b/rpcs3/Emu/system_config.h index 3355c4d4528c..6d6ef869e7f1 100644 --- a/rpcs3/Emu/system_config.h +++ b/rpcs3/Emu/system_config.h @@ -28,7 +28,7 @@ struct cfg_root : cfg::node cfg::string llvm_cpu{ this, "Use LLVM CPU" }; cfg::_int<0, 1024> llvm_threads{ this, "Max LLVM Compile Threads", 0 }; cfg::_bool ppu_llvm_greedy_mode{ this, "PPU LLVM Greedy Mode", false, false }; - cfg::_bool ppu_llvm_precompilation{ this, "PPU LLVM Precompilation", true }; + cfg::_bool llvm_precompilation{ this, "LLVM Precompilation", true }; cfg::_enum thread_scheduler{this, "Thread Scheduler Mode", thread_scheduler_mode::os}; cfg::_bool set_daz_and_ftz{ this, "Set DAZ and FTZ", false }; cfg::_enum spu_decoder{ this, "SPU Decoder", spu_decoder_type::llvm }; diff --git a/rpcs3/rpcs3qt/emu_settings_type.h b/rpcs3/rpcs3qt/emu_settings_type.h index 62d67e43fd45..a3371d57d957 100644 --- a/rpcs3/rpcs3qt/emu_settings_type.h +++ b/rpcs3/rpcs3qt/emu_settings_type.h @@ -19,7 +19,7 @@ enum class emu_settings_type SPUDebug, MFCDebug, MaxLLVMThreads, - PPULLVMPrecompilation, + LLVMPrecompilation, EnableTSX, AccurateGETLLAR, AccurateSpuDMA, @@ -204,7 +204,7 @@ inline static const QMap settings_location = { emu_settings_type::SPUDebug, { "Core", "SPU Debug"}}, { emu_settings_type::MFCDebug, { "Core", "MFC Debug"}}, { emu_settings_type::MaxLLVMThreads, { "Core", "Max LLVM Compile Threads"}}, - { emu_settings_type::PPULLVMPrecompilation, { "Core", "PPU LLVM Precompilation"}}, + { emu_settings_type::LLVMPrecompilation, { "Core", "LLVM Precompilation"}}, { emu_settings_type::EnableTSX, { "Core", "Enable TSX"}}, { emu_settings_type::AccurateGETLLAR, { "Core", "Accurate GETLLAR"}}, { emu_settings_type::AccurateSpuDMA, { "Core", "Accurate SPU DMA"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index f15369661781..3628fb33f01a 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -1452,8 +1452,8 @@ settings_dialog::settings_dialog(std::shared_ptr gui_settings, std m_emu_settings->EnhanceCheckBox(ui->fixupPPUVNAN, emu_settings_type::FixupPPUVNAN); SubscribeTooltip(ui->fixupPPUVNAN, tooltips.settings.fixup_ppuvnan); - m_emu_settings->EnhanceCheckBox(ui->ppuPrecompilation, emu_settings_type::PPULLVMPrecompilation); - SubscribeTooltip(ui->ppuPrecompilation, tooltips.settings.ppu_precompilation); + m_emu_settings->EnhanceCheckBox(ui->llvmPrecompilation, emu_settings_type::LLVMPrecompilation); + SubscribeTooltip(ui->llvmPrecompilation, tooltips.settings.llvm_precompilation); m_emu_settings->EnhanceCheckBox(ui->suspendSavestates, emu_settings_type::SuspendEmulationSavestateMode); SubscribeTooltip(ui->suspendSavestates, tooltips.settings.suspend_savestates); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index dd9c67dd7a9b..bafa45567617 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -2394,9 +2394,9 @@ - + - PPU LLVM Precompilation + PPU/SPU LLVM Precompilation diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index 1982a108e0d5..6e73f2f4b2ce 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -75,7 +75,7 @@ class Tooltips : public QObject const QString ppu__static = tr("Interpreter (slow). Try this if PPU Recompiler (LLVM) doesn't work."); const QString ppu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if PPU Recompiler (LLVM) doesn't work."); const QString ppu_llvm = tr("Recompiles and caches the game's PPU code using the LLVM Recompiler once before running it for the first time.\nThis is by far the fastest option and should always be used.\nShould you face compatibility issues, fall back to one of the Interpreters and retry.\nIf unsure, use this option."); - const QString ppu_precompilation = tr("Searches the game's directory and precompiles extra PPU modules during boot.\nIf disabled, these modules will only be compiled when needed. Depending on the game, this might interrupt the gameplay unexpectedly and possibly frequently.\nOnly disable this if you want to get ingame more quickly."); + const QString llvm_precompilation = tr("Searches the game's directory and precompiles extra PPU and SPU modules during boot.\nIf disabled, these modules will only be compiled when needed. Depending on the game, this might interrupt the gameplay unexpectedly and possibly frequently.\nOnly disable this if you want to get ingame more quickly."); const QString spu__static = tr("Interpreter (slow). Try this if SPU Recompiler (LLVM) doesn't work."); const QString spu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if SPU Recompiler (LLVM) doesn't work."); const QString spu_asmjit = tr("Recompiles the game's SPU code using the ASMJIT Recompiler.\nThis is the fast option with very good compatibility.\nIf unsure, use this option.");