From c58500af1cd57b4cb29a572f93ea0617a642cee3 Mon Sep 17 00:00:00 2001 From: Gita Koblents Date: Sun, 31 Mar 2024 17:03:10 -0400 Subject: [PATCH] Disclaim Cold Code Cache - use the same heuristcs for code cache disclaim as for data cache - disclaim starting from the cold code - move stack overflow outline instructions into the warm area to increase disclaim efficiency Depends on: https://github.com/eclipse/omr/pull/7300 Depends on: https://github.com/eclipse/omr/pull/7324 --- runtime/compiler/control/HookedByTheJit.cpp | 41 +++++++++ runtime/compiler/control/J9Options.cpp | 5 +- .../compiler/control/OptionsPostRestore.cpp | 3 +- runtime/compiler/control/rossa.cpp | 2 +- runtime/compiler/runtime/CRRuntime.cpp | 5 +- runtime/compiler/runtime/J9CodeCache.cpp | 90 +++++++++++++++++++ runtime/compiler/runtime/J9CodeCache.hpp | 2 + .../compiler/runtime/J9CodeCacheManager.cpp | 47 ++++++++++ .../compiler/runtime/J9CodeCacheManager.hpp | 7 +- .../compiler/x/codegen/X86PrivateLinkage.cpp | 42 ++++++++- 10 files changed, 236 insertions(+), 8 deletions(-) diff --git a/runtime/compiler/control/HookedByTheJit.cpp b/runtime/compiler/control/HookedByTheJit.cpp index b5c5a0a8c9b..ae2f65cb579 100644 --- a/runtime/compiler/control/HookedByTheJit.cpp +++ b/runtime/compiler/control/HookedByTheJit.cpp @@ -4592,10 +4592,22 @@ void disclaimDataCaches(uint64_t crtElapsedTime) (uint32_t)crtElapsedTime, numDisclaimed, rssBefore, rssAfter, rssBefore - rssAfter); } +void disclaimCodeCaches(uint64_t crtElapsedTime) + { + size_t rssBefore = getRSS_Kb(); + int numDisclaimed = TR::CodeCacheManager::instance()->disclaimAllCodeCaches(); + size_t rssAfter = getRSS_Kb(); + if (TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + TR_VerboseLog::writeLineLocked(TR_Vlog_PERF, "t=%u JIT disclaimed %d Code Caches RSS before=%zu KB, RSS after=%zu KB, delta=%zu KB", + (uint32_t)crtElapsedTime, numDisclaimed, rssBefore, rssAfter, rssBefore - rssAfter); + } + void memoryDisclaimLogic(TR::CompilationInfo *compInfo, uint64_t crtElapsedTime, uint8_t jitState) { static uint64_t lastDataCacheDisclaimTime = 0; static int32_t lastNumAllocatedDataCaches = 0; + static uint64_t lastCodeCacheDisclaimTime = 0; + static int32_t lastNumAllocatedCodeCaches = 0; J9JITConfig *jitConfig = compInfo->getJITConfig(); @@ -4624,6 +4636,24 @@ void memoryDisclaimLogic(TR::CompilationInfo *compInfo, uint64_t crtElapsedTime, } } } + + // Use logic similar to Data caches above for now + if (TR::CodeCacheManager::instance()->isDisclaimEnabled()) + { + // Ensure we don't do it too often + if (crtElapsedTime > lastCodeCacheDisclaimTime + TR::Options::_minTimeBetweenMemoryDisclaims) + { + // Disclaim if at least one code cache has been allocated since the last disclaim + // or if there was a large time interval since the last disclaim + if (TR::CodeCacheManager::instance()->getCurrentNumberOfCodeCaches() > lastNumAllocatedCodeCaches || + crtElapsedTime > lastCodeCacheDisclaimTime + 12 * TR::Options::_minTimeBetweenMemoryDisclaims) + { + disclaimCodeCaches(crtElapsedTime); + lastCodeCacheDisclaimTime = crtElapsedTime; // Update the time when disclaim was last performed + lastNumAllocatedCodeCaches = TR::CodeCacheManager::instance()->getCurrentNumberOfCodeCaches(); + } + } + } } @@ -5301,6 +5331,17 @@ static void jitStateLogic(J9JITConfig * jitConfig, TR::CompilationInfo * compInf } } +#ifdef DEBUG_CODE_DISCLAIM + static int printRSS = 0; + printRSS++; + if (printRSS == 4 && // ~every 2s + TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + { + TR_VerboseLog::writeLineLocked(TR_Vlog_PERF, "Current RSS %zuKB", getRSS_Kb()); + printRSS = 0; + } +#endif + if (lateDisclaimNeeded) { CpuUtilization *cpuUtil = compInfo->getCpuUtil(); diff --git a/runtime/compiler/control/J9Options.cpp b/runtime/compiler/control/J9Options.cpp index b229af502c9..f726bbeb02d 100644 --- a/runtime/compiler/control/J9Options.cpp +++ b/runtime/compiler/control/J9Options.cpp @@ -2733,6 +2733,7 @@ J9::Options::fePreProcess(void * base) { self()->setOption(TR_DisableDataCacheDisclaiming); self()->setOption(TR_DisableIProfilerDataDisclaiming); + self()->setOption(TR_EnableCodeCacheDisclaiming, false); } return true; @@ -2897,7 +2898,8 @@ J9::Options::fePostProcessJIT(void * base) } if (!self()->getOption(TR_DisableDataCacheDisclaiming) || - !self()->getOption(TR_DisableIProfilerDataDisclaiming)) + !self()->getOption(TR_DisableIProfilerDataDisclaiming) || + self()->getOption(TR_EnableCodeCacheDisclaiming)) { // Check requirements for memory disclaiming (Linux kernel and default page size) TR::Options::disableMemoryDisclaimIfNeeded(jitConfig); @@ -2954,6 +2956,7 @@ J9::Options::disableMemoryDisclaimIfNeeded(J9JITConfig *jitConfig) { TR::Options::getCmdLineOptions()->setOption(TR_DisableDataCacheDisclaiming); TR::Options::getCmdLineOptions()->setOption(TR_DisableIProfilerDataDisclaiming); + TR::Options::getCmdLineOptions()->setOption(TR_EnableCodeCacheDisclaiming, false); } return shouldDisableMemoryDisclaim; } diff --git a/runtime/compiler/control/OptionsPostRestore.cpp b/runtime/compiler/control/OptionsPostRestore.cpp index 3cc098e4472..ca557099209 100644 --- a/runtime/compiler/control/OptionsPostRestore.cpp +++ b/runtime/compiler/control/OptionsPostRestore.cpp @@ -805,7 +805,8 @@ J9::OptionsPostRestore::postProcessInternalCompilerOptions() } if (!TR::Options::getCmdLineOptions()->getOption(TR_DisableDataCacheDisclaiming) || - !TR::Options::getCmdLineOptions()->getOption(TR_DisableIProfilerDataDisclaiming)) + !TR::Options::getCmdLineOptions()->getOption(TR_DisableIProfilerDataDisclaiming) || + TR::Options::getCmdLineOptions()->getOption(TR_EnableCodeCacheDisclaiming)) { TR::Options::disableMemoryDisclaimIfNeeded(_jitConfig); } diff --git a/runtime/compiler/control/rossa.cpp b/runtime/compiler/control/rossa.cpp index 1b0aa6a1ff8..a0923d095f5 100644 --- a/runtime/compiler/control/rossa.cpp +++ b/runtime/compiler/control/rossa.cpp @@ -1191,7 +1191,7 @@ onLoadInternal( } else { - jitConfig->codeCacheKB = 2048; + jitConfig->codeCacheKB = TR::Options::getCmdLineOptions()->getOption(TR_EnableCodeCacheDisclaiming) 2048 * 2 : 2048; jitConfig->dataCacheKB = 2048; } #else diff --git a/runtime/compiler/runtime/CRRuntime.cpp b/runtime/compiler/runtime/CRRuntime.cpp index 60226dc42da..b1094138ed0 100644 --- a/runtime/compiler/runtime/CRRuntime.cpp +++ b/runtime/compiler/runtime/CRRuntime.cpp @@ -652,9 +652,10 @@ TR::CRRuntime::prepareForCheckpoint() #endif // Make sure the limit for the ghost files is at least as big as the data cache size - if (!TR::Options::getCmdLineOptions()->getOption(TR_DisableDataCacheDisclaiming)) + if (!TR::Options::getCmdLineOptions()->getOption(TR_DisableDataCacheDisclaiming) || + TR::Options::getCmdLineOptions()->getOption(TR_EnableCodeCacheDisclaiming)) { - U_32 ghostFileLimit = vm->jitConfig->dataCacheKB * 1024; // convert to bytes + U_32 ghostFileLimit = std::max(vm->jitConfig->dataCacheKB, vm->jitConfig->codeCacheTotalKB) * 1024; // convert to bytes vm->internalVMFunctions->setRequiredGhostFileLimit(vmThread, ghostFileLimit); } diff --git a/runtime/compiler/runtime/J9CodeCache.cpp b/runtime/compiler/runtime/J9CodeCache.cpp index a31e47f3816..987b4b289ad 100644 --- a/runtime/compiler/runtime/J9CodeCache.cpp +++ b/runtime/compiler/runtime/J9CodeCache.cpp @@ -55,10 +55,19 @@ #include "env/VerboseLog.hpp" #include "omrformatconsts.h" +// for madvise +#ifdef LINUX +#include +#ifndef MADV_PAGEOUT +#define MADV_PAGEOUT 21 +#endif +#endif + OMR::CodeCacheMethodHeader *getCodeCacheMethodHeader(char *p, int searchLimit, J9JITExceptionTable * metaData); #define addFreeBlock2(start, end) addFreeBlock2WithCallSite((start), (end), __FILE__, __LINE__) +#define DISCLAIM_PAGE_SIZE 4*1024 TR::CodeCache * J9::CodeCache::self() @@ -134,6 +143,40 @@ J9::CodeCache::initialize(TR::CodeCacheManager *manager, if (!self()->OMR::CodeCache::initialize(manager, codeCacheSegment, allocatedCodeCacheSizeInBytes)) return false; + +#ifdef LINUX + if (manager->isDisclaimEnabled()) + { + uint8_t *middle = _warmCodeAlloc + (_coldCodeAlloc - _warmCodeAlloc) / 2; + size_t round = DISCLAIM_PAGE_SIZE - 1; + + middle = (uint8_t *)(((size_t)(middle + round)) & ~round); + size_t coldCacheSize = _coldCodeAlloc - middle; + + if (madvise(middle, coldCacheSize, MADV_NOHUGEPAGE) != 0) + { + if (TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + TR_VerboseLog::writeLine(TR_Vlog_INFO, "Failed to set MADV_NOHUGEPAGE for code cache"); + } + else if (TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + { + TR_VerboseLog::writeLine(TR_Vlog_INFO, "In code cache %p small pages start from %p\n", this, middle); + } + + // If the memory segment is backed by a file, disable read-ahead + // so that touching one byte brings a single page in + if (codeCacheSegment->j9segment()->vmemIdentifier.allocator == OMRPORT_VMEM_RESERVE_USED_MMAP_SHM) + { + if (madvise(middle, coldCacheSize, MADV_RANDOM) != 0) + { + if (TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + TR_VerboseLog::writeLine(TR_Vlog_INFO, "Failed to set MADV_RANDOM for cold code cache"); + } + } + } +#endif // ifdef LINUX + + self()->setInitialAllocationPointers(); _manager->reportCodeLoadEvents(); @@ -763,3 +806,50 @@ extern "C" } } + + +int32_t +J9::CodeCache::disclaim(TR::CodeCacheManager *manager, bool canDisclaimOnSwap) + { + int32_t disclaimDone = 0; + +#ifdef LINUX + bool trace = TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance); + uint8_t *disclaim_start = _coldCodeAlloc; + size_t pageSize = DISCLAIM_PAGE_SIZE; + size_t round = pageSize - 1; + disclaim_start = (uint8_t *)(((size_t)(disclaim_start + round)) & ~round); + size_t disclaim_size = pageSize * ((_coldCodeAllocBase - disclaim_start)/pageSize); + + if (trace) + { + size_t warm_size = _warmCodeAlloc - _segment->segmentBase() + sizeof(this); + size_t cold_size = _coldCodeAllocBase - _coldCodeAlloc; + + TR_VerboseLog::writeLine(TR_Vlog_PERF, "Disclaiming cold code cache %p : coldStart=%p coldBase=%p warm_size=%zuB cold_size=%zuB cold_size/warm_size=%5.2f%%\n", + this, _coldCodeAlloc, _coldCodeAllocBase, + warm_size, cold_size, cold_size * 100.0/warm_size); + } + + int32_t ret = madvise((void *)disclaim_start, disclaim_size, MADV_PAGEOUT); + + if (ret != 0) + { + if (trace) + TR_VerboseLog::writeLine(TR_Vlog_PERF, "WARNING: Failed to use madvise to disclaim memory for code cache"); + + if (ret == EINVAL) + { + manager->setDisclaimEnabled(false); // Don't try to disclaim again, since support seems to be missing + if (trace) + TR_VerboseLog::writeLine(TR_Vlog_PERF, "WARNING: Disabling data cache disclaiming from now on"); + } + } + else + { + disclaimDone = 1; + } +#endif // ifdef LINUX + + return disclaimDone; + } diff --git a/runtime/compiler/runtime/J9CodeCache.hpp b/runtime/compiler/runtime/J9CodeCache.hpp index b3ca1c1c499..570feb9163a 100644 --- a/runtime/compiler/runtime/J9CodeCache.hpp +++ b/runtime/compiler/runtime/J9CodeCache.hpp @@ -103,6 +103,8 @@ class OMR_EXTENSIBLE CodeCache : public OMR::CodeCacheConnector */ void resetCodeCache(); + int32_t disclaim(TR::CodeCacheManager *manager, bool canDisclaimOnSwap); + private: /** * @brief Restore trampoline pointers to their initial positions diff --git a/runtime/compiler/runtime/J9CodeCacheManager.cpp b/runtime/compiler/runtime/J9CodeCacheManager.cpp index 4426485ec67..f883660ca42 100644 --- a/runtime/compiler/runtime/J9CodeCacheManager.cpp +++ b/runtime/compiler/runtime/J9CodeCacheManager.cpp @@ -385,6 +385,21 @@ J9::CodeCacheManager::allocateCodeCacheSegment(size_t segmentSize, } #endif +#ifdef LINUX + if (_disclaimEnabled) + { + segmentType |= MEMORY_TYPE_VIRTUAL; // Make sure mmap is used for allocation + + // If swap is enabled, we can allocate memory with mmap(MAP_ANOYNMOUS|MAP_PRIVATE) and disclaim to swap + // If swap is not enabled we can disclaim to a backing file + TR::CompilationInfo * compInfo = TR::CompilationInfo::get(_jitConfig); + if (!TR::Options::getCmdLineOptions()->getOption(TR_DisclaimMemoryOnSwap) || compInfo->isSwapMemoryDisabled()) + { + segmentType |= MEMORY_TYPE_DISCLAIMABLE_TO_FILE; + } + } +#endif + mcc_printf("TR::CodeCache::allocate : requesting %d bytes\n", codeCacheSizeToAllocate); mcc_printf("TR::CodeCache::allocate : javaVM = %p\n", javaVM); mcc_printf("TR::CodeCache::allocate : codeCacheList = %p\n", jitConfig->codeCacheList); @@ -488,11 +503,20 @@ J9::CodeCacheManager::allocateCodeCacheSegment(size_t segmentSize, vmemParams.startAddress, vmemParams.endAddress); } + + if (TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + TR_VerboseLog::writeLine(TR_Vlog_INFO, "Allocated new code cache segment %p starting at address %p", + codeCacheSegment, + codeCacheSegment->heapBase); } else { // TODO: we should generate a trace point mcc_printf("TR::CodeCache::allocate : codeCacheSegment is NULL, %p\n",codeCacheSegment); + + if (TR::Options::getCmdLineOptions()->getVerboseOption(TR_VerbosePerformance)) + TR_VerboseLog::writeLine(TR_Vlog_INFO, "Failed to allocate new code cache segment of %d Kb", _jitConfig->codeCacheKB); + return 0; } @@ -779,3 +803,26 @@ J9::CodeCacheManager::printOccupancyStats() codeCache->printOccupancyStats(); } } + + +int32_t +J9::CodeCacheManager::disclaimAllCodeCaches() + { + if (!_disclaimEnabled) + return 0; + + int32_t numDisclaimed = 0; + +#ifdef LINUX + TR::CompilationInfo *compInfo = TR::CompilationInfo::get(_jitConfig); + bool canDisclaimOnSwap = TR::Options::getCmdLineOptions()->getOption(TR_DisclaimMemoryOnSwap) && !compInfo->isSwapMemoryDisabled(); + + CacheListCriticalSection scanCacheList(self()); + for (TR::CodeCache *codeCache = self()->getFirstCodeCache(); codeCache; codeCache = codeCache->next()) + { + numDisclaimed += codeCache->disclaim(self(), canDisclaimOnSwap); + } +#endif // LINUX + + return numDisclaimed; + } diff --git a/runtime/compiler/runtime/J9CodeCacheManager.hpp b/runtime/compiler/runtime/J9CodeCacheManager.hpp index d60f63af853..f1bcee9038d 100644 --- a/runtime/compiler/runtime/J9CodeCacheManager.hpp +++ b/runtime/compiler/runtime/J9CodeCacheManager.hpp @@ -29,7 +29,7 @@ namespace J9 { class CodeCacheManager; } namespace J9 { typedef CodeCacheManager CodeCacheManagerConnector; } #endif - +#include "control/Options.hpp" #include "env/jittypes.h" //#include "runtime/CodeCacheMemorySegment.hpp" //#include "runtime/CodeCache.hpp" @@ -56,6 +56,7 @@ class OMR_EXTENSIBLE CodeCacheManager : public OMR::CodeCacheManagerConnector _fe(fe) { _codeCacheManager = reinterpret_cast(this); + _disclaimEnabled = TR::Options::getCmdLineOptions()->getOption(TR_EnableCodeCacheDisclaiming); } void *operator new(size_t s, TR::CodeCacheManager *m) { return m; } @@ -152,12 +153,16 @@ class OMR_EXTENSIBLE CodeCacheManager : public OMR::CodeCacheManagerConnector * @brief Print occupancy stats for each code cache */ void printOccupancyStats(); + bool isDisclaimEnabled() const { return _disclaimEnabled; } + void setDisclaimEnabled(bool value) { _disclaimEnabled = value; } + int32_t disclaimAllCodeCaches(); private : TR_FrontEnd *_fe; static TR::CodeCacheManager *_codeCacheManager; static J9JITConfig *_jitConfig; static J9JavaVM *_javaVM; + bool _disclaimEnabled; // If true, code cache can be disclaimed to a file or swap }; } // namespace J9 diff --git a/runtime/compiler/x/codegen/X86PrivateLinkage.cpp b/runtime/compiler/x/codegen/X86PrivateLinkage.cpp index 3639ffb492e..ab67b1157ff 100644 --- a/runtime/compiler/x/codegen/X86PrivateLinkage.cpp +++ b/runtime/compiler/x/codegen/X86PrivateLinkage.cpp @@ -763,8 +763,29 @@ void J9::X86::PrivateLinkage::createPrologue(TR::Instruction *cursor) cursor = generateLabelInstruction(cursor, TR::InstOpCode::JBE4, checkLabel, cg()); cursor = generateLabelInstruction(cursor, TR::InstOpCode::label, endLabel, cg()); - // At this point, cg()->getAppendInstruction() is already in the cold code section. - generateVFPRestoreInstruction(vfp, cursor->getNode(), cg()); + // Code Cache disclaim is more efficient if this code is in the warm area + bool moveToWarm = TR::Options::getCmdLineOptions()->getOption(TR_EnableCodeCacheDisclaiming) && + cg()->getLastWarmInstruction(); + + TR::Instruction* prevAppendInstruction = NULL; + TR::Instruction* followInstruction = NULL; + + if (moveToWarm) + { + // OverflowCheck OOL executes often so move it to the warm cache + if (cg()->getAppendInstruction() != cg()->getLastWarmInstruction()) + prevAppendInstruction = cg()->getAppendInstruction(); + + cg()->setAppendInstruction(cg()->getLastWarmInstruction()); + + followInstruction = cg()->getAppendInstruction()->getNext(); + } + else + { + // At this point, cg()->getAppendInstruction() is already in the cold code section. + generateVFPRestoreInstruction(vfp, cursor->getNode(), cg()); + } + generateLabelInstruction(TR::InstOpCode::label, cursor->getNode(), checkLabel, cg()); generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, cursor->getNode(), machine()->getRealRegister(TR::RealRegister::edi), allocSize, cg()); if (doAllocateFrameSpeculatively) @@ -779,6 +800,23 @@ void J9::X86::PrivateLinkage::createPrologue(TR::Instruction *cursor) generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), cursor->getNode(), espReal, allocSize, cg()); } generateLabelInstruction(TR::InstOpCode::JMP4, cursor->getNode(), endLabel, cg()); + + if (moveToWarm) + { + TR::Instruction *appendInstruction = cg()->getAppendInstruction(); + cg()->getLastWarmInstruction()->setLastWarmInstruction(false); + cg()->setLastWarmInstruction(appendInstruction); + appendInstruction->setLastWarmInstruction(true); + appendInstruction->setNext(followInstruction); + + if (followInstruction) + { + followInstruction->setPrev(appendInstruction); + } + + if (prevAppendInstruction) + cg()->setAppendInstruction(prevAppendInstruction); + } } if (cg()->canEmitBreakOnDFSet())