Skip to content

Commit

Permalink
Sampling allocation bytes precisely without compromising the performance
Browse files Browse the repository at this point in the history
in order to sampling heap allocation bytes precisely without
compromising the performance, we have the below changes.

Handle instrumentableAllocateHook and
VM_OBJECT_ALLOCATE_WITHIN_THRESHOLD is still via disabling inline
allocation
Handle smapling for tracepoint is still during out of line allocation
Handle smapling for JEP331 is via setTLHSamplingTop(size)

Using fake Heap Top instead of fake Heap Alloc for disabling inline
allocation (realHeapAlloc-->realHeapTop,
set/getRealAlloc()-->set/getRealTop(), getRealSize(), getUsedSize())
Using fake Heap Top to force out of line allocation at sampling thresold
for sampling heap allocation (setTLHSamplingTop()/resetTLHSamplingTop())
setTLHSamplingTop(size) are only called in the below 3 cases
	1, sampling threshold has been changed via GC-VM api
j9gc_set_allocation_sampling_interval()
	2, TLH is refreshed
	3, after sampling is done

Counting trace allocation byte includes allocation bytes inside TLH
Cache before flushing(_stats.bytesAllocated(true),
stats->_tlhAllocatedUsed, )
Handle traceAllocationByte for Health
Center(_oolTraceAllocationBytesForTracepoint,
oolObjectSamplingBytesGranularityForTracepoint) and traceAllocationByte
for JEP331(_traceAllocationBytesForHook,
objectSamplingBytesGranularityForHook) independently


Signed-off-by: Lin Hu <linhu@ca.ibm.com>
  • Loading branch information
LinHu2016 committed Jun 2, 2020
1 parent 16d5a51 commit 52e91cf
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 45 deletions.
29 changes: 25 additions & 4 deletions example/glue/EnvironmentDelegate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,23 +237,44 @@ class MM_EnvironmentDelegate

#if defined (OMR_GC_THREAD_LOCAL_HEAP)
/**
* Disable inline TLH allocates by hiding the real heap allocation address from
* JIT/Interpreter in realHeapAlloc and setting heapALloc == HeapTop so TLH
* Disable inline TLH allocates by hiding the real heap top address from
* JIT/Interpreter in realHeapTop and setting HeapTop == heapALloc so TLH
* looks full.
*
*/
void disableInlineTLHAllocate() {}

/**
* Re-enable inline TLH allocate by restoring heapAlloc from realHeapAlloc
* Re-enable inline TLH allocate by restoring heapTop from realHeapTop
*/
void enableInlineTLHAllocate() {}

/**
* Determine if inline TLH allocate is enabled; its enabled if realheapAlloc is NULL.
* Determine if inline TLH allocate is enabled; its enabled if realheapTop is NULL.
* @return TRUE if inline TLH allocates currently enabled for this thread; FALSE otherwise
*/
bool isInlineTLHAllocateEnabled() { return false; }

/**
* Set TLH Sampling Top by hiding the real heap top address from
* JIT/Interpreter in realHeapTop and setting HeapTop = (HeapAlloc + size) if size < (HeapTop - HeapAlloc)
* so out of line allocate would happen at TLH Sampling Top.
* If size >= (HeapTop - HeapAlloc) resetTLHSamplingTop()
*
* @param size the number of bytes to next sampling point
*/
void setTLHSamplingTop(uintptr_t size) {}

/**
* Restore heapTop from realHeapTop if realHeapTop != NULL
*/
void resetTLHSamplingTop() {}

/**
* Retrieve allocation size inside TLH Cache.
* @return (heapAlloc - heapBase)
*/
uintptr_t getAllocatedSizeInsideTLH() { return 0; }
#endif /* OMR_GC_THREAD_LOCAL_HEAP */

MM_EnvironmentDelegate()
Expand Down
4 changes: 2 additions & 2 deletions example/glue/LanguageThreadLocalHeap.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2014, 2016 IBM Corp. and others
* Copyright (c) 2014, 2020 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -28,7 +28,7 @@

typedef struct LanguageThreadLocalHeapStruct {
uint8_t* heapBase;
uint8_t* realHeapAlloc;
uint8_t* realHeapTop;
uintptr_t objectFlags;
uintptr_t refreshSize;
void* memorySubSpace;
Expand Down
37 changes: 31 additions & 6 deletions gc/base/EnvironmentBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ class MM_EnvironmentBase : public MM_BaseVirtual

MM_FreeEntrySizeClassStats _freeEntrySizeClassStats; /**< GC thread local statistics structure for heap free entry size (sizeClass) distribution */

uintptr_t _oolTraceAllocationBytes; /**< Tracks the bytes allocated since the last ool object trace */
uintptr_t _traceAllocationBytes; /**< Tracks the bytes allocated since the last object trace include ool and allocation is completed from TLH */
uintptr_t _oolTraceAllocationBytes; /**< Tracks the bytes allocated since the last ool object trace for Tracepoint */
uintptr_t _traceAllocationBytes; /**< Tracks the bytes allocated since the last ool object trace for Hooks ( only record ool allocation bytes and flushed TLH size) */
uintptr_t _traceAllocationAdjustmentBytes; /**< keep the bytes of times of sampling thresold for last object trace(include allocation byttes inside TLH) */

uintptr_t approxScanCacheCount; /**< Local copy of approximate entries in global Cache Scan List. Updated upon allocation of new cache. */

Expand Down Expand Up @@ -526,23 +527,45 @@ class MM_EnvironmentBase : public MM_BaseVirtual

#if defined (OMR_GC_THREAD_LOCAL_HEAP)
/**
* Disable inline TLH allocates by hiding the real heap allocation address from
* JIT/Interpreter in realHeapAlloc and setting heapALloc == HeapTop so TLH
* Disable inline TLH allocates by hiding the real heap top address from
* JIT/Interpreter in realHeapTop and setting HeapTop == heapALloc so TLH
* looks full.
*
*/
void disableInlineTLHAllocate() { _delegate.disableInlineTLHAllocate(); }

/**
* Re-enable inline TLH allocate by restoring heapAlloc from realHeapAlloc
* Re-enable inline TLH allocate by restoring heapTop from realHeapTop
*/
void enableInlineTLHAllocate() { _delegate.enableInlineTLHAllocate(); }

/**
* Determine if inline TLH allocate is enabled; its enabled if realheapAlloc is NULL.
* Determine if inline TLH allocate is enabled; its enabled if realheapTop is NULL.
* @return TRUE if inline TLH allocates currently enabled for this thread; FALSE otherwise
*/
bool isInlineTLHAllocateEnabled() { return _delegate.isInlineTLHAllocateEnabled(); }

/**
* Set TLH Sampling Top by hiding the real heap top address from
* JIT/Interpreter in realHeapTop and setting HeapTop = (HeapAlloc + size) if size < (HeapTop - HeapAlloc)
* so out of line allocate would happen at TLH Sampling Top.
* If size >= (HeapTop - HeapAlloc) resetTLHSamplingTop()
*
* @param size the number of bytes to next sampling point
*/
void setTLHSamplingTop(uintptr_t size) { _delegate.setTLHSamplingTop(size); }

/**
* Restore heapTop from realHeapTop if realHeapTop != NULL
*/
void resetTLHSamplingTop() { _delegate.resetTLHSamplingTop(); }

/**
* Retrieve allocation size inside TLH Cache.
* @return (heapAlloc - heapBase)
*/
uintptr_t getAllocatedSizeInsideTLH() { return _delegate.getAllocatedSizeInsideTLH(); }

#endif /* OMR_GC_THREAD_LOCAL_HEAP */

MMINLINE uintptr_t getWorkUnitIndex() { return _workUnitIndex; }
Expand Down Expand Up @@ -666,6 +689,7 @@ class MM_EnvironmentBase : public MM_BaseVirtual
,_freeEntrySizeClassStats()
,_oolTraceAllocationBytes(0)
,_traceAllocationBytes(0)
,_traceAllocationAdjustmentBytes(0)
,approxScanCacheCount(0)
,_activeValidator(NULL)
,_lastSyncPointReached(NULL)
Expand Down Expand Up @@ -719,6 +743,7 @@ class MM_EnvironmentBase : public MM_BaseVirtual
,_freeEntrySizeClassStats()
,_oolTraceAllocationBytes(0)
,_traceAllocationBytes(0)
,_traceAllocationAdjustmentBytes(0)
,approxScanCacheCount(0)
,_activeValidator(NULL)
,_lastSyncPointReached(NULL)
Expand Down
10 changes: 10 additions & 0 deletions gc/base/GCExtensionsBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ class MM_GCExtensionsBase : public MM_BaseVirtual {
bool doOutOfLineAllocationTrace;
bool doFrequentObjectAllocationSampling; /**< Whether to track object allocations*/
uintptr_t oolObjectSamplingBytesGranularity; /**< How often (in bytes) we do an ool allocation trace */
uintptr_t objectSamplingBytesGranularity; /**< How often (in bytes) we do an allocation trace (for triggering J9HOOK_MM_OBJECT_ALLOCATION_SAMPLING) */

uintptr_t frequentObjectAllocationSamplingRate; /**< # bytes to sample / # bytes allocated */
MM_FrequentObjectsStats* frequentObjectsStats;
uint32_t frequentObjectAllocationSamplingDepth; /**< # of frequent objects we'd like to report */
Expand Down Expand Up @@ -1307,6 +1309,13 @@ class MM_GCExtensionsBase : public MM_BaseVirtual {
#endif /* defined(OMR_GC_CONCURRENT_SCAVENGER) */
}

/**
* Check if we need to disable inline allocation
*/
MMINLINE bool
needDisableInlineAllocation() {
return (fvtest_disableInlineAllocation || instrumentableAllocateHookEnabled || disableInlineCacheForAllocationThreshold);
}

MM_GCExtensionsBase()
: MM_BaseVirtual()
Expand Down Expand Up @@ -1388,6 +1397,7 @@ class MM_GCExtensionsBase : public MM_BaseVirtual {
, doOutOfLineAllocationTrace(true) /* Tracing after ever x bytes allocated per thread. Enabled by default. */
, doFrequentObjectAllocationSampling(false) /* Finds most frequently allocated classes. Disabled by default. */
, oolObjectSamplingBytesGranularity(16*1024*1024) /* Default granularity set to 16M (shows <1% perf loss). */
, objectSamplingBytesGranularity(UDATA_MAX) /* default UDATA_MAX (disabled) */
, frequentObjectAllocationSamplingRate(100)
, frequentObjectsStats(NULL)
, frequentObjectAllocationSamplingDepth(0)
Expand Down
22 changes: 9 additions & 13 deletions gc/base/TLHAllocationInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ MM_TLHAllocationInterface::allocateObject(MM_EnvironmentBase *env, MM_AllocateDe
{
void *result = NULL;
MM_AllocationContext *ac = env->getAllocationContext();
_bytesAllocatedBase = _stats.bytesAllocated();
_bytesAllocatedBase = _stats.bytesAllocated(true);

if (NULL != ac) {
/* ensure that we are allowed to use the AI in this configuration in the Tarok case */
Expand Down Expand Up @@ -200,21 +200,17 @@ MM_TLHAllocationInterface::allocateObject(MM_EnvironmentBase *env, MM_AllocateDe

}

if (NULL != result) {
uintptr_t sizeInBytesAllocated = allocDescription->getContiguousBytes();
/* Increment by bytes allocated */
env->_traceAllocationBytes += sizeInBytesAllocated;

if (!allocDescription->isCompletedFromTlh()) {
if ((NULL != result) && !allocDescription->isCompletedFromTlh()) {
#if defined(OMR_GC_OBJECT_ALLOCATION_NOTIFY)
env->objectAllocationNotify((omrobjectptr_t)result);
env->objectAllocationNotify((omrobjectptr_t)result);
#endif /* OMR_GC_OBJECT_ALLOCATION_NOTIFY */
_stats._allocationBytes += sizeInBytesAllocated;
_stats._allocationCount += 1;
}
_stats._allocationBytes += allocDescription->getContiguousBytes();
_stats._allocationCount += 1;
}
env->_oolTraceAllocationBytes += (_stats.bytesAllocated() - _bytesAllocatedBase); /* Increment by bytes allocated */

uintptr_t sizeInBytesAllocated = (_stats.bytesAllocated(true) - _bytesAllocatedBase);
env->_oolTraceAllocationBytes += sizeInBytesAllocated; /* Increment by bytes allocated for Trace */
env->_traceAllocationBytes += sizeInBytesAllocated; /* Increment by bytes allocated for Hook */
return result;
}

Expand Down Expand Up @@ -287,7 +283,7 @@ MM_TLHAllocationInterface::flushCache(MM_EnvironmentBase *env)

#if defined(OMR_GC_THREAD_LOCAL_HEAP)
if (!_owningEnv->isInlineTLHAllocateEnabled()) {
/* Clear out realHeapAlloc field; tlh code below will take care of rest */
/* Clear out realHeapTop field; tlh code below will take care of rest */
_owningEnv->enableInlineTLHAllocate();
}
#endif /* OMR_GC_THREAD_LOCAL_HEAP */
Expand Down
18 changes: 13 additions & 5 deletions gc/base/TLHAllocationSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ MM_TLHAllocationSupport::clear(MM_EnvironmentBase *env)

/* Any previous cache to clear ? */
if (NULL != memoryPool) {
memoryPool->abandonTlhHeapChunk(getRealAlloc(), getTop());
memoryPool->abandonTlhHeapChunk(getAlloc(), getRealTop());
reportClearCache(env);
}
wipeTLH(env);
Expand Down Expand Up @@ -172,17 +172,19 @@ MM_TLHAllocationSupport::refresh(MM_EnvironmentBase *env, MM_AllocateDescription

MM_AllocationStats *stats = _objectAllocationInterface->getAllocationStats();

stats->_tlhDiscardedBytes += getSize();
stats->_tlhDiscardedBytes += getRealSize();
uintptr_t usedSize = getUsedSize();
stats->_tlhAllocatedUsed += usedSize;

/* Try to cache the current TLH */
if (NULL != getRealAlloc() && getSize() >= tlhMinimumSize) {
if ((NULL != getRealTop()) && (getRealSize() >= tlhMinimumSize)) {
/* Cache the current TLH because it is bigger than the minimum size */
MM_HeapLinkedFreeHeaderTLH* newCache = (MM_HeapLinkedFreeHeaderTLH*)getRealAlloc();
MM_HeapLinkedFreeHeaderTLH* newCache = (MM_HeapLinkedFreeHeaderTLH*)getAlloc();

#if defined(OMR_VALGRIND_MEMCHECK)
valgrindMakeMemUndefined((uintptr_t)newCache, sizeof(MM_HeapLinkedFreeHeaderTLH));
#endif /* defined(OMR_VALGRIND_MEMCHECK) */
newCache->setSize(getSize());
newCache->setSize(getRealSize());
newCache->_memoryPool = getMemoryPool();
newCache->_memorySubSpace = getMemorySubSpace();
newCache->setNext(_abandonedList, compressed);
Expand Down Expand Up @@ -263,6 +265,12 @@ MM_TLHAllocationSupport::refresh(MM_EnvironmentBase *env, MM_AllocateDescription
}

if (didRefresh) {

uintptr_t samplingBytesGranularity = env->getExtensions()->objectSamplingBytesGranularity;
if (!extensions->needDisableInlineAllocation() && (UDATA_MAX != samplingBytesGranularity)) {
uintptr_t traceBytes = (env->_traceAllocationBytes + usedSize) % samplingBytesGranularity;
env->setTLHSamplingTop(samplingBytesGranularity - traceBytes);
}
/*
* THL was refreshed however it might be already flushed in GC
* Some special features (like Prepare Heap For Walk called by GC check)
Expand Down
26 changes: 16 additions & 10 deletions gc/base/TLHAllocationSupport.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,25 @@ class MM_TLHAllocationSupport
MMINLINE void *getBase() { return (void *)_tlh->heapBase; };
MMINLINE void setBase(void *basePtr) { _tlh->heapBase = (uint8_t *)basePtr; };

/* CMVC 143597: setRealAlloc and getRealAlloc are fix to ensure we seal the
/* CMVC 143597: setRealTop and getRealTop are fix to ensure we seal the
* TLH properly under frequent hook and unhook of ObjectAllocInstrumentable, which toggles
* enable/disable of TLH. If TLH is disabled, realHeapAlloc holds the true TLH alloc ptr.
* enable/disable of TLH. If TLH is disabled, realHeapTop holds the true TLH top ptr.
*/
MMINLINE void setRealAlloc(void *realAllocPtr) { _tlh->realHeapAlloc = (uint8_t *)realAllocPtr; };
MMINLINE void *getRealAlloc()
MMINLINE void setRealTop(void *realTopPtr) { _tlh->realHeapTop = (uint8_t *)realTopPtr; };
MMINLINE void *getRealTop()
{
if (NULL != _tlh->realHeapAlloc) {
return _tlh->realHeapAlloc;
if (NULL != _tlh->realHeapTop) {
return _tlh->realHeapTop;
} else {
return *_pointerToHeapAlloc;
return *_pointerToHeapTop;
}
};

/* @return the number of bytes, which are available for allocation in the TLH */
MMINLINE uintptr_t getRealSize() { return (uintptr_t)getRealTop() - (uintptr_t)getAlloc(); };
/* @return the number of used bytes in the TLH */
MMINLINE uintptr_t getUsedSize() { return (uintptr_t)getAlloc() - (uintptr_t)getBase(); };

MMINLINE void *getAlloc() { return (void *) *_pointerToHeapAlloc; };
MMINLINE void setAlloc(void *allocPtr) { *_pointerToHeapAlloc = (uint8_t *)allocPtr; };
MMINLINE void *getTop() { return (void *) *_pointerToHeapTop; };
Expand Down Expand Up @@ -145,14 +151,14 @@ class MM_TLHAllocationSupport
MMINLINE void wipeTLH(MM_EnvironmentBase *env)
{
#if defined(OMR_GC_OBJECT_ALLOCATION_NOTIFY)
objectAllocationNotify(env, _tlh->heapBase, getRealAlloc());
objectAllocationNotify(env, _tlh->heapBase, getAlloc());
#endif /* OMR_GC_OBJECT_ALLOCATION_NOTIFY */
#if defined(OMR_GC_OBJECT_MAP)
/* Mark all newly allocated objects from the TLH as valid objects */
markValidObjectForRange(env, _tlh->heapBase, getRealAlloc());
markValidObjectForRange(env, _tlh->heapBase, getAlloc());
#endif
setupTLH(env, NULL, NULL, NULL, NULL);
setRealAlloc(NULL);
setRealTop(NULL);
}

#if defined(OMR_GC_OBJECT_ALLOCATION_NOTIFY)
Expand Down
4 changes: 3 additions & 1 deletion gc/stats/AllocationStats.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2016 IBM Corp. and others
* Copyright (c) 1991, 2020 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -30,6 +30,7 @@ MM_AllocationStats::clear()
_tlhRefreshCountFresh = 0;
_tlhRefreshCountReused = 0;
_tlhAllocatedFresh = 0;
_tlhAllocatedUsed = 0;
_tlhAllocatedReused = 0;
_tlhRequestedBytes = 0;
_tlhDiscardedBytes = 0;
Expand All @@ -54,6 +55,7 @@ MM_AllocationStats::merge(MM_AllocationStats *stats)
MM_AtomicOperations::add(&_tlhRefreshCountFresh, stats->_tlhRefreshCountFresh);
MM_AtomicOperations::add(&_tlhRefreshCountReused, stats->_tlhRefreshCountReused);
MM_AtomicOperations::add(&_tlhAllocatedFresh, stats->_tlhAllocatedFresh);
MM_AtomicOperations::add(&_tlhAllocatedUsed, stats->_tlhAllocatedUsed);
MM_AtomicOperations::add(&_tlhRequestedBytes, stats->_tlhRequestedBytes);
MM_AtomicOperations::add(&_tlhDiscardedBytes, stats->_tlhDiscardedBytes);
MM_AtomicOperations::add(&_tlhAllocatedReused, stats->_tlhAllocatedReused);
Expand Down
18 changes: 14 additions & 4 deletions gc/stats/AllocationStats.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2016 IBM Corp. and others
* Copyright (c) 1991, 2020 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -37,6 +37,7 @@ class MM_AllocationStats : public MM_Base
uintptr_t _tlhRefreshCountFresh; /**< Number of refreshes where fresh memory was allocated. */
uintptr_t _tlhRefreshCountReused; /**< Number of refreshes where TLHs were reused. */
uintptr_t _tlhAllocatedFresh; /**< The amount of memory allocated fresh out of the heap. */
uintptr_t _tlhAllocatedUsed; /**< The amount of memory has been flushed */
uintptr_t _tlhAllocatedReused; /**< The amount of memory allocated form reused TLHs. */
uintptr_t _tlhRequestedBytes; /**< The amount of memory requested for refreshes. */
uintptr_t _tlhDiscardedBytes; /**< The amount of memory from discarded TLHs. */
Expand All @@ -59,14 +60,22 @@ class MM_AllocationStats : public MM_Base

#if defined(OMR_GC_THREAD_LOCAL_HEAP)
uintptr_t tlhBytesAllocated() { return _tlhAllocatedFresh - _tlhDiscardedBytes; }
uintptr_t tlhBytesAllocatedUsed() { return _tlhAllocatedUsed; }
uintptr_t nontlhBytesAllocated() { return _allocationBytes; }
#endif

uintptr_t bytesAllocated(){
/* return bytesAllocated includes new refreshed TLH, if used == false(default)
* return bytesAllocated (but does not include new refreshed TLH), if used == true.
*/
uintptr_t bytesAllocated(bool used = false) {
uintptr_t totalBytesAllocated = 0;

#if defined(OMR_GC_THREAD_LOCAL_HEAP)
totalBytesAllocated += tlhBytesAllocated();
#if defined(OMR_GC_THREAD_LOCAL_HEAP)
if (used) {
totalBytesAllocated += tlhBytesAllocatedUsed();
} else {
totalBytesAllocated += tlhBytesAllocated();
}
totalBytesAllocated += nontlhBytesAllocated();
#else
totalBytesAllocated += _allocationBytes;
Expand All @@ -80,6 +89,7 @@ class MM_AllocationStats : public MM_Base
_tlhRefreshCountFresh(0),
_tlhRefreshCountReused(0),
_tlhAllocatedFresh(0),
_tlhAllocatedUsed(0),
_tlhAllocatedReused(0),
_tlhRequestedBytes(0),
_tlhDiscardedBytes(0),
Expand Down

0 comments on commit 52e91cf

Please sign in to comment.