Skip to content

Commit

Permalink
Sampling allocation bytes precisely without compromising the performance
Browse files Browse the repository at this point in the history
in order to sampling heap allocation bytes precisely without
compromising the performance, we have the below changes.

Handle instrumentableAllocateHook and
VM_OBJECT_ALLOCATE_WITHIN_THRESHOLD is still via disabling inline
allocation
Handle smapling for tracepoint is still during out of line allocation
Handle smapling for JEP331 is via setTLHSamplingTop(size)

Using fake Heap Top instead of fake Heap Alloc for disabling inline
allocation (realHeapAlloc-->realHeapTop,
set/getRealAlloc()-->set/getRealTop(), getRealSize(), getUsedSize())
Using fake Heap Top to force out of line allocation at sampling thresold
for sampling heap allocation (setTLHSamplingTop()/resetTLHSamplingTop())
setTLHSamplingTop(size) are only called in the below 3 cases
	1, sampling threshold has been changed via GC-VM api
j9gc_set_allocation_sampling_interval()
	2, TLH is refreshed
	3, after sampling is done

Counting trace allocation byte includes allocation bytes inside TLH
Cache before flushing(_stats.bytesAllocated(true),
stats->_tlhAllocatedUsed, )
Handle traceAllocationByte for Health
Center(_oolTraceAllocationBytesForTracepoint,
oolObjectSamplingBytesGranularityForTracepoint) and traceAllocationByte
for JEP331(_traceAllocationBytesForHook,
objectSamplingBytesGranularityForHook) independently

Signed-off-by: Lin Hu <linhu@ca.ibm.com>
  • Loading branch information
LinHu2016 committed Jun 4, 2020
1 parent 6efef8f commit d2dd559
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 58 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 1991, 2018 IBM Corp. and others
* Copyright (c) 1991, 2020 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -75,9 +75,9 @@ protected GCObjectHeapIteratorAddressOrderedList_V1(U8Pointer base, U8Pointer to
excludedRangeList.add(new U8Pointer[] {heapAlloc, heapTop});
} else {
/* Might be an instrumented VM */
U8Pointer realHeapAlloc = adjustedToRange(vmThread.allocateThreadLocalHeap().realHeapAlloc(), base, top);
if(realHeapAlloc.notNull() && isSomethingToAdd(realHeapAlloc, heapTop)) {
excludedRangeList.add(new U8Pointer[] {realHeapAlloc, heapTop});
U8Pointer realHeapTop = adjustedToRange(vmThread.allocateThreadLocalHeap().realHeapTop(), base, top);
if(realHeapTop.notNull() && isSomethingToAdd(heapAlloc, realHeapTop)) {
excludedRangeList.add(new U8Pointer[] {heapAlloc, realHeapTop});
}
}
}
Expand All @@ -91,9 +91,9 @@ protected GCObjectHeapIteratorAddressOrderedList_V1(U8Pointer base, U8Pointer to
excludedRangeList.add(new U8Pointer[] {heapAlloc, heapTop});
} else {
/* Might be an instrumented VM */
U8Pointer realHeapAlloc = adjustedToRange(vmThread.nonZeroAllocateThreadLocalHeap().realHeapAlloc(), base, top);
if(realHeapAlloc.notNull() && isSomethingToAdd(realHeapAlloc, heapTop)) {
excludedRangeList.add(new U8Pointer[] {realHeapAlloc, heapTop});
U8Pointer realHeapTop = adjustedToRange(vmThread.nonZeroAllocateThreadLocalHeap().realHeapTop(), base, top);
if(realHeapTop.notNull() && isSomethingToAdd(heapAlloc, realHeapTop)) {
excludedRangeList.add(new U8Pointer[] {heapAlloc, realHeapTop});
}
}
}
Expand Down
15 changes: 10 additions & 5 deletions runtime/gc_base/modronapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -866,9 +866,9 @@ j9gc_allocation_threshold_changed(J9VMThread *currentThread)
* j9gc_set_allocation_sampling_interval(vm, (UDATA)4096);
* To trigger an event for every object allocation:
* j9gc_set_allocation_sampling_interval(vm, (UDATA)0);
* The initial MM_GCExtensions::oolObjectSamplingBytesGranularity value is 16M
* or set by command line option "-Xgc:allocationSamplingGranularity".
* By default, the sampling interval is going to be set to 512 KB.
* To disable allocation sampling
* j9gc_set_allocation_sampling_interval(vm, UDATA_MAX);
* The initial MM_GCExtensionsBase::objectSamplingBytesGranularity value is UDATA_MAX.
*
* @parm[in] vm The J9JavaVM
* @parm[in] samplingInterval The allocation sampling interval.
Expand All @@ -878,10 +878,15 @@ j9gc_set_allocation_sampling_interval(J9JavaVM *vm, UDATA samplingInterval)
{
MM_GCExtensions *extensions = MM_GCExtensions::getExtensions(vm);
if (0 == samplingInterval) {
/* avoid (env->_oolTraceAllocationBytes) % 0 which could be undefined. */
/* avoid (env->_traceAllocationBytes) % 0 which could be undefined. */
samplingInterval = 1;
}
extensions->oolObjectSamplingBytesGranularity = samplingInterval;

if (samplingInterval != extensions->objectSamplingBytesGranularity) {
extensions->objectSamplingBytesGranularity = samplingInterval;
J9VMThread *currentThread = vm->internalVMFunctions->currentVMThread(vm);
j9gc_allocation_threshold_changed(currentThread);
}
}

/**
Expand Down
104 changes: 90 additions & 14 deletions runtime/gc_glue_java/EnvironmentDelegate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,58 +252,134 @@ MM_EnvironmentDelegate::forceOutOfLineVMAccess()

#if defined (J9VM_GC_THREAD_LOCAL_HEAP)
/**
* Disable inline TLH allocates by hiding the real heap allocation address from
* JIT/Interpreter in realHeapAlloc and setting heapALloc == HeapTop so TLH
* Disable inline TLH allocates by hiding the real heap top address from
* JIT/Interpreter in realHeapTop and setting HeapTop == heapALloc so TLH
* looks full.
*
*/
void
MM_EnvironmentDelegate::disableInlineTLHAllocate()
{
J9ModronThreadLocalHeap *tlh = (J9ModronThreadLocalHeap *)&_vmThread->allocateThreadLocalHeap;
tlh->realHeapAlloc = _vmThread->heapAlloc;
_vmThread->heapAlloc = _vmThread->heapTop;
if (NULL == tlh->realHeapTop) {
tlh->realHeapTop = _vmThread->heapTop;
}
_vmThread->heapTop = _vmThread->heapAlloc;

#if defined(J9VM_GC_NON_ZERO_TLH)
tlh = (J9ModronThreadLocalHeap *)&_vmThread->nonZeroAllocateThreadLocalHeap;
tlh->realHeapAlloc = _vmThread->nonZeroHeapAlloc;
_vmThread->nonZeroHeapAlloc = _vmThread->nonZeroHeapTop;
if (NULL == tlh->realHeapTop) {
tlh->realHeapTop = _vmThread->nonZeroHeapTop;
}
_vmThread->nonZeroHeapTop = _vmThread->nonZeroHeapAlloc;
#endif /* defined(J9VM_GC_NON_ZERO_TLH) */
}

/**
* Re-enable inline TLH allocate by restoring heapAlloc from realHeapAlloc
* Re-enable inline TLH allocate by restoring heapTop from realHeapTop
*/
void
MM_EnvironmentDelegate::enableInlineTLHAllocate()
{
J9ModronThreadLocalHeap *tlh = (J9ModronThreadLocalHeap *)&_vmThread->allocateThreadLocalHeap;
_vmThread->heapAlloc = tlh->realHeapAlloc;
tlh->realHeapAlloc = NULL;
if (NULL != tlh->realHeapTop) {
_vmThread->heapTop = tlh->realHeapTop;
tlh->realHeapTop = NULL;
}

#if defined(J9VM_GC_NON_ZERO_TLH)
tlh = (J9ModronThreadLocalHeap *)&_vmThread->nonZeroAllocateThreadLocalHeap;
_vmThread->nonZeroHeapAlloc = tlh->realHeapAlloc;
tlh->realHeapAlloc = NULL;
if (NULL != tlh->realHeapTop) {
_vmThread->nonZeroHeapTop = tlh->realHeapTop;
tlh->realHeapTop = NULL;
}
#endif /* defined(J9VM_GC_NON_ZERO_TLH) */
}

/**
* Determine if inline TLH allocate is enabled; its enabled if realheapAlloc is NULL.
* Determine if inline TLH allocate is enabled; its enabled if realheapTop is NULL.
* @return TRUE if inline TLH allocates currently enabled for this thread; FALSE otherwise
*/
bool
MM_EnvironmentDelegate::isInlineTLHAllocateEnabled()
{
J9ModronThreadLocalHeap *tlh = (J9ModronThreadLocalHeap *)&_vmThread->allocateThreadLocalHeap;
bool result = (NULL == tlh->realHeapAlloc);
bool result = (NULL == tlh->realHeapTop);

#if defined(J9VM_GC_NON_ZERO_TLH)
tlh = (J9ModronThreadLocalHeap *)&_vmThread->nonZeroAllocateThreadLocalHeap;
result = result && (NULL == tlh->realHeapAlloc);
result = result && (NULL == tlh->realHeapTop);
#endif /* defined(J9VM_GC_NON_ZERO_TLH) */

return result;
}


void
MM_EnvironmentDelegate::setTLHSamplingTop(uintptr_t size)
{
uintptr_t max = 0;
J9ModronThreadLocalHeap *tlh = (J9ModronThreadLocalHeap *)&_vmThread->allocateThreadLocalHeap;
if (NULL != tlh->realHeapTop) {
max = tlh->realHeapTop - _vmThread->heapAlloc;
} else {
max = _vmThread->heapTop - _vmThread->heapAlloc;
}

if (max > size) {
if (NULL == tlh->realHeapTop) {
tlh->realHeapTop = _vmThread->heapTop;
}
_vmThread->heapTop = _vmThread->heapAlloc + size;
} else {
if (NULL != tlh->realHeapTop) {
_vmThread->heapTop = tlh->realHeapTop;
tlh->realHeapTop = NULL;
}
}

#if defined(J9VM_GC_NON_ZERO_TLH)
/* TODO: need to find better way set TLH sampling Top for NON ZERO case */
tlh = (J9ModronThreadLocalHeap *)&_vmThread->nonZeroAllocateThreadLocalHeap;
if (NULL != tlh->realHeapTop) {
max = tlh->realHeapTop - _vmThread->nonZeroHeapAlloc;
} else {
max = _vmThread->nonZeroHeapTop - _vmThread->nonZeroHeapAlloc;
}

if (max > size) {
if (NULL == tlh->realHeapTop) {
tlh->realHeapTop = _vmThread->nonZeroHeapTop;
}
_vmThread->nonZeroHeapTop = _vmThread->nonZeroHeapAlloc + size;
} else {
if (NULL != tlh->realHeapTop) {
_vmThread->nonZeroHeapTop = tlh->realHeapTop;
tlh->realHeapTop = NULL;
}
}

#endif /* defined(J9VM_GC_NON_ZERO_TLH) */
}

void
MM_EnvironmentDelegate::resetTLHSamplingTop()
{
enableInlineTLHAllocate();
}

uintptr_t
MM_EnvironmentDelegate::getAllocatedSizeInsideTLH()
{
uintptr_t ret = 0;
J9ModronThreadLocalHeap *tlh = (J9ModronThreadLocalHeap *)&_vmThread->allocateThreadLocalHeap;
ret += _vmThread->heapAlloc - tlh->heapBase;
#if defined(J9VM_GC_NON_ZERO_TLH)
tlh = (J9ModronThreadLocalHeap *)&_vmThread->nonZeroAllocateThreadLocalHeap;
ret += _vmThread->nonZeroHeapAlloc - tlh->heapBase;
#endif /* defined(J9VM_GC_NON_ZERO_TLH) */
return ret;
}

#endif /* J9VM_GC_THREAD_LOCAL_HEAP */

14 changes: 7 additions & 7 deletions runtime/gc_glue_java/EnvironmentDelegate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,20 +161,20 @@ class MM_EnvironmentDelegate

#if defined (OMR_GC_THREAD_LOCAL_HEAP)
/**
* Disable inline TLH allocates by hiding the real heap allocation address from
* JIT/Interpreter in realHeapAlloc and setting heapALloc == HeapTop so TLH
* Disable inline TLH allocates by hiding the real heap top address from
* JIT/Interpreter in realHeapTop and setting HeapTop == heapALloc so TLH
* looks full.
*
*/
void disableInlineTLHAllocate();

/**
* Re-enable inline TLH allocate by restoring heapAlloc from realHeapAlloc
* Re-enable inline TLH allocate by restoring heapTop from realHeapTop
*/
void enableInlineTLHAllocate();

/**
* Determine if inline TLH allocate is enabled; its enabled if realheapAlloc is NULL.
* Determine if inline TLH allocate is enabled; its enabled if realheapTop is NULL.
* @return TRUE if inline TLH allocates currently enabled for this thread; FALSE otherwise
*/
bool isInlineTLHAllocateEnabled();
Expand All @@ -187,18 +187,18 @@ class MM_EnvironmentDelegate
*
* @param size the number of bytes to next sampling point
*/
void setTLHSamplingTop(uintptr_t size) {}
void setTLHSamplingTop(uintptr_t size);

/**
* Restore heapTop from realHeapTop if realHeapTop != NULL
*/
void resetTLHSamplingTop() {}
void resetTLHSamplingTop();

/**
* Retrieve allocation size inside TLH Cache.
* @return (heapAlloc - heapBase)
*/
uintptr_t getAllocatedSizeInsideTLH() { return 0; }
uintptr_t getAllocatedSizeInsideTLH();

#endif /* OMR_GC_THREAD_LOCAL_HEAP */

Expand Down
Loading

0 comments on commit d2dd559

Please sign in to comment.