Skip to content

Commit bf74bf8

Browse files
authored
Cleanup JVMTI sampling implementation (#174)
1 parent c03c1c8 commit bf74bf8

File tree

9 files changed

+69
-147
lines changed

9 files changed

+69
-147
lines changed

ddprof-lib/src/main/cpp/arch.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@
1919

2020
#include <stddef.h>
2121

22+
#ifdef _LP64
23+
# define LP64_ONLY(code) code
24+
#else // !_LP64
25+
# define LP64_ONLY(code)
26+
#endif // _LP64
27+
2228
typedef unsigned char u8;
2329
typedef unsigned short u16;
2430
typedef unsigned int u32;

ddprof-lib/src/main/cpp/callTraceStorage.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include "os.h"
2020
#include <string.h>
2121

22+
#define COMMA ,
23+
2224
static const u32 INITIAL_CAPACITY = 65536;
2325
static const u32 CALL_TRACE_CHUNK = 8 * 1024 * 1024;
2426
static const u32 OVERFLOW_TRACE_ID = 0x7fffffff;
@@ -81,8 +83,7 @@ class LongHashTable {
8183
}
8284
};
8385

84-
CallTrace CallTraceStorage::_overflow_trace = {
85-
false, 1, {BCI_ERROR, (jmethodID) "storage_overflow"}};
86+
CallTrace CallTraceStorage::_overflow_trace = {false, 1, {BCI_ERROR, LP64_ONLY(0 COMMA) (jmethodID)"storage_overflow"}};
8687

8788
CallTraceStorage::CallTraceStorage() : _allocator(CALL_TRACE_CHUNK), _lock(0) {
8889
_current_table = LongHashTable::allocate(NULL, INITIAL_CAPACITY);

ddprof-lib/src/main/cpp/livenessTracker.cpp

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -67,22 +67,14 @@ void LivenessTracker::cleanup_table(bool forced) {
6767
if (target != i) {
6868
_table[target] = _table[i]; // will clone TrackingEntry at 'i'
6969
_table[i].ref = nullptr; // will nullify the original ref
70-
assert(_table[i].frames == _table[target].frames);
71-
_table[i].frames = nullptr; // will nullify the original frames
72-
assert(_table[target].frames != nullptr);
70+
_table[i].call_trace_id = 0;
7371
}
74-
assert(_table[target].ref != nullptr &&
75-
_table[target].frames != nullptr);
7672
_table[target].age += epoch_diff;
7773
} else {
7874
jweak tmpRef = _table[i].ref;
7975
_table[i].ref = nullptr;
8076
env->DeleteWeakGlobalRef(tmpRef);
81-
82-
jvmtiFrameInfo *tmpFrames = _table[i].frames;
83-
_table[i].frames = nullptr;
84-
assert(_table[i].ref == nullptr && _table[i].frames == nullptr);
85-
delete[] tmpFrames;
77+
_table[i].call_trace_id = 0;
8678
}
8779
}
8880

@@ -119,8 +111,6 @@ void LivenessTracker::flush_table(std::set<int> *tracked_thread_ids) {
119111
for (int i = 0; i < (sz = _table_size); i++) {
120112
jobject ref = env->NewLocalRef(_table[i].ref);
121113
if (ref != nullptr) {
122-
assert(_table[i].frames != nullptr);
123-
124114
if (tracked_thread_ids != nullptr) {
125115
tracked_thread_ids->insert(_table[i].tid);
126116
}
@@ -141,9 +131,7 @@ void LivenessTracker::flush_table(std::set<int> *tracked_thread_ids) {
141131
: 0;
142132
env->ReleaseStringUTFChars(name_str, name);
143133

144-
Profiler::instance()->recordExternalSample(
145-
1, _table[i].tid, _table[i].frames, _table[i].frames_size,
146-
/*truncated=*/false, BCI_LIVENESS, &event);
134+
Profiler::instance()->recordDeferredSample(_table[i].tid, _table[i].call_trace_id, BCI_LIVENESS, &event);
147135
}
148136

149137
env->DeleteLocalRef(ref);
@@ -292,8 +280,7 @@ Error LivenessTracker::initialize(Arguments &args) {
292280
}
293281

294282
void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
295-
jobject object, int num_frames,
296-
jvmtiFrameInfo *frames) {
283+
jobject object, u32 call_trace_id) {
297284
if (!_enabled) {
298285
// disabled
299286
return;
@@ -340,12 +327,7 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
340327
_table[idx].alloc = event;
341328
_table[idx].skipped = skipped;
342329
_table[idx].age = 0;
343-
_table[idx].frames_size = num_frames;
344-
_table[idx].frames = new jvmtiFrameInfo[_table[idx].frames_size];
345-
if (frames != nullptr) {
346-
memcpy(_table[idx].frames, frames,
347-
sizeof(jvmtiFrameInfo) * _table[idx].frames_size);
348-
}
330+
_table[idx].call_trace_id = call_trace_id;
349331
_table[idx].ctx = Contexts::get(tid);
350332
}
351333

ddprof-lib/src/main/cpp/livenessTracker.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ typedef struct TrackingEntry {
3232
jweak ref;
3333
AllocEvent alloc;
3434
double skipped;
35-
jint frames_size;
36-
jvmtiFrameInfo *frames;
35+
u32 call_trace_id;
3736
jint tid;
3837
jlong time;
3938
jlong age;
@@ -100,8 +99,7 @@ class LivenessTracker {
10099

101100
Error start(Arguments &args);
102101
void stop();
103-
void track(JNIEnv *env, AllocEvent &event, jint tid, jobject object,
104-
int num_frames, jvmtiFrameInfo *frames);
102+
void track(JNIEnv *env, AllocEvent &event, jint tid, jobject object, u32 call_trace_id);
105103
void flush(std::set<int> &tracked_thread_ids);
106104

107105
static void JNICALL GarbageCollectionFinish(jvmtiEnv *jvmti_env);

ddprof-lib/src/main/cpp/objectSampler.cpp

Lines changed: 5 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,7 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
6767
event._id = id;
6868
}
6969

70-
jint frames_size = 0;
71-
jvmtiFrameInfo *frames = nullptr;
72-
70+
u32 call_trace_id = 0;
7371
// we do record the details and stacktraces only for when recording
7472
// allocations or liveness
7573
if (_record_allocations || _record_liveness) {
@@ -78,31 +76,14 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
7876
? 1
7977
: 1 / (1 - exp(-size / (double)_interval)));
8078

81-
frames = new jvmtiFrameInfo[_max_stack_depth];
79+
call_trace_id = Profiler::instance()->recordJVMTISample(size, tid, thread, BCI_ALLOC, &event, !_record_allocations);
8280

83-
if (jvmti->GetStackTrace(thread, 0, _max_stack_depth, frames,
84-
&frames_size) != JVMTI_ERROR_NONE ||
85-
frames_size <= 0) {
86-
delete[] frames;
81+
if (call_trace_id == 0) {
8782
return;
8883
}
89-
90-
if (frames_size > 0) {
91-
std::set<jclass> classes;
92-
jclass method_class;
93-
for (int i = 0; i < frames_size; i++) {
94-
if (jvmti->GetMethodDeclaringClass(frames[i].method, &method_class) ==
95-
0) {
96-
classes.insert(method_class);
97-
}
98-
}
99-
}
10084
}
10185

10286
if (_record_allocations) {
103-
Profiler::instance()->recordExternalSample(
104-
size, tid, frames, frames_size, /*truncated=*/false, BCI_ALLOC, &event);
105-
10687
u64 current_samples = __sync_add_and_fetch(&_alloc_event_count, 1);
10788
// in order to lower the number of atomic reads from the timestamp variable
10889
// the check will be performed only each N samples
@@ -130,15 +111,10 @@ void ObjectSampler::recordAllocation(jvmtiEnv *jvmti, JNIEnv *jni,
130111
}
131112

132113
// Either we are recording liveness or tracking GC generations (lightweight
133-
// livenss samples)
114+
// liveness samples)
134115
if (_gc_generations || _record_liveness) {
135-
LivenessTracker::instance()->track(jni, event, tid, object, frames_size,
136-
frames);
116+
LivenessTracker::instance()->track(jni, event, tid, object, call_trace_id);
137117
}
138-
139-
// it's safe to delete frames - the liveness tracker keeps a full copy of the
140-
// frames and manages its own memory
141-
delete[] frames;
142118
}
143119

144120
Error ObjectSampler::check(Arguments &args) {

ddprof-lib/src/main/cpp/profiler.cpp

Lines changed: 39 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "profiler.h"
1919
#include "asyncSampleMutex.h"
20+
#include "common.h"
2021
#include "context.h"
2122
#include "counters.h"
2223
#include "ctimer.h"
@@ -548,51 +549,6 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames,
548549
return trace.frames - frames + 1;
549550
}
550551

551-
int Profiler::getJavaTraceJvmti(jvmtiFrameInfo *jvmti_frames,
552-
ASGCT_CallFrame *frames, int start_depth,
553-
int max_depth) {
554-
int num_frames;
555-
if (VM::jvmti()->GetStackTrace(NULL, start_depth, _max_stack_depth,
556-
jvmti_frames, &num_frames) == 0 &&
557-
num_frames > 0) {
558-
return convertFrames(jvmti_frames, frames, num_frames);
559-
}
560-
return 0;
561-
}
562-
563-
int Profiler::getJavaTraceInternal(jvmtiFrameInfo *jvmti_frames,
564-
ASGCT_CallFrame *frames, int max_depth) {
565-
// We cannot call pure JVM TI here, because it assumes _thread_in_native
566-
// state, but allocation events happen in _thread_in_vm state, see
567-
// https://github.com/jvm-profiling-tools/java-profiler/issues/64
568-
JNIEnv *jni = VM::jni();
569-
if (jni == NULL) {
570-
return 0;
571-
}
572-
573-
JitWriteProtection jit(false);
574-
VMThread *vm_thread = VMThread::fromEnv(jni);
575-
int num_frames;
576-
if (VMStructs::_get_stack_trace(NULL, vm_thread, 0, max_depth, jvmti_frames,
577-
&num_frames) == 0 &&
578-
num_frames > 0) {
579-
return convertFrames(jvmti_frames, frames, num_frames);
580-
}
581-
return 0;
582-
}
583-
584-
inline int Profiler::convertFrames(jvmtiFrameInfo *jvmti_frames,
585-
ASGCT_CallFrame *frames, int num_frames) {
586-
// Convert to AsyncGetCallTrace format.
587-
// Note: jvmti_frames and frames may overlap.
588-
for (int i = 0; i < num_frames; i++) {
589-
jint bci = jvmti_frames[i].location;
590-
frames[i].method_id = jvmti_frames[i].method;
591-
frames[i].bci = bci;
592-
}
593-
return num_frames;
594-
}
595-
596552
void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
597553
NMethod *nmethod) {
598554
if (nmethod->isNMethod() && nmethod->isAlive()) {
@@ -634,10 +590,7 @@ void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
634590
}
635591
}
636592

637-
void Profiler::recordExternalSample(u64 counter, int tid,
638-
jvmtiFrameInfo *jvmti_frames,
639-
jint num_jvmti_frames, bool truncated,
640-
jint event_type, Event *event) {
593+
u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event_type, Event *event, bool deferred) {
641594
atomicInc(_total_samples);
642595

643596
u32 lock_index = getLockIndex(tid);
@@ -647,29 +600,50 @@ void Profiler::recordExternalSample(u64 counter, int tid,
647600
// Too many concurrent signals already
648601
atomicInc(_failures[-ticks_skipped]);
649602

650-
if (event_type == BCI_CPU && _cpu_engine == &perf_events) {
651-
// Need to reset PerfEvents ring buffer, even though we discard the
652-
// collected trace
653-
PerfEvents::resetBuffer(tid);
654-
}
655-
return;
603+
return 0;
656604
}
657605
u32 call_trace_id = 0;
658-
if (!_omit_stacktraces && jvmti_frames != nullptr) {
606+
if (!_omit_stacktraces) {
659607
ASGCT_CallFrame *frames = _calltrace_buffer[lock_index]->_asgct_frames;
608+
jvmtiFrameInfo *jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;
660609

661610
int num_frames = 0;
662-
if (!_jfr.active() && BCI_ALLOC >= event_type && event_type >= BCI_PARK &&
663-
event->_id) {
664-
num_frames = makeFrame(frames, event_type, event->_id);
611+
612+
if (VM::jvmti()->GetStackTrace(thread, 0, _max_stack_depth, jvmti_frames, &num_frames) == JVMTI_ERROR_NONE && num_frames > 0) {
613+
// Convert to AsyncGetCallTrace format.
614+
// Note: jvmti_frames and frames may overlap.
615+
for (int i = 0; i < num_frames; i++) {
616+
jint bci = jvmti_frames[i].location;
617+
jmethodID mid = jvmti_frames[i].method;
618+
frames[i].method_id = mid;
619+
frames[i].bci = bci;
620+
// see https://github.com/async-profiler/async-profiler/pull/1090
621+
LP64_ONLY(frames[i].padding = 0;)
622+
}
665623
}
666624

667-
num_frames +=
668-
convertFrames(jvmti_frames, frames + num_frames, num_jvmti_frames);
625+
call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter);
626+
}
627+
if (!deferred) {
628+
_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);
629+
}
630+
631+
_locks[lock_index].unlock();
632+
return call_trace_id;
633+
}
669634

670-
call_trace_id =
671-
_call_trace_storage.put(num_frames, frames, truncated, counter);
635+
void Profiler::recordDeferredSample(int tid, u32 call_trace_id, jint event_type, Event *event) {
636+
atomicInc(_total_samples);
637+
638+
u32 lock_index = getLockIndex(tid);
639+
if (!_locks[lock_index].tryLock() &&
640+
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
641+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
642+
// Too many concurrent signals already
643+
atomicInc(_failures[-ticks_skipped]);
644+
return;
672645
}
646+
673647
_jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event);
674648

675649
_locks[lock_index].unlock();
@@ -1153,13 +1127,11 @@ Error Profiler::start(Arguments &args, bool reset) {
11531127
// (Re-)allocate calltrace buffers
11541128
if (_max_stack_depth != args._jstackdepth) {
11551129
_max_stack_depth = args._jstackdepth;
1156-
size_t buffer_size =
1157-
(_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) *
1158-
sizeof(CallTraceBuffer);
1130+
size_t nelem = _max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES;
11591131

11601132
for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
11611133
free(_calltrace_buffer[i]);
1162-
_calltrace_buffer[i] = (CallTraceBuffer *)malloc(buffer_size);
1134+
_calltrace_buffer[i] = (CallTraceBuffer*)calloc(nelem, sizeof(CallTraceBuffer));
11631135
if (_calltrace_buffer[i] == NULL) {
11641136
_max_stack_depth = 0;
11651137
return Error("Not enough memory to allocate stack trace buffers (try "

ddprof-lib/src/main/cpp/profiler.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ const int RESERVED_FRAMES = 4;
5252

5353
enum EventMask { EM_CPU = 1 << 0, EM_WALL = 1 << 1, EM_ALLOC = 1 << 2 };
5454

55-
struct CallTraceBuffer {
55+
union CallTraceBuffer {
5656
ASGCT_CallFrame _asgct_frames[1];
57+
jvmtiFrameInfo _jvmti_frames[1];
5758
};
5859

5960
class FrameName;
@@ -138,12 +139,6 @@ class Profiler {
138139
int tid, StackContext *java_ctx, bool *truncated);
139140
int getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames, int max_depth,
140141
StackContext *java_ctx, bool *truncated);
141-
int getJavaTraceJvmti(jvmtiFrameInfo *jvmti_frames, ASGCT_CallFrame *frames,
142-
int start_depth, int max_depth);
143-
int getJavaTraceInternal(jvmtiFrameInfo *jvmti_frames,
144-
ASGCT_CallFrame *frames, int max_depth);
145-
int convertFrames(jvmtiFrameInfo *jvmti_frames, ASGCT_CallFrame *frames,
146-
int num_frames);
147142
void fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
148143
NMethod *nmethod);
149144
void updateThreadName(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread,
@@ -223,9 +218,8 @@ class Profiler {
223218
ASGCT_CallFrame *frames);
224219
void recordSample(void *ucontext, u64 weight, int tid, jint event_type,
225220
u32 call_trace_id, Event *event);
226-
void recordExternalSample(u64 weight, int tid, jvmtiFrameInfo *jvmti_frames,
227-
jint num_jvmti_frames, bool truncated,
228-
jint event_type, Event *event);
221+
u32 recordJVMTISample(u64 weight, int tid, jthread thread, jint event_type, Event *event, bool deferred);
222+
void recordDeferredSample(int tid, u32 call_trace_id, jint event_type, Event *event);
229223
void recordExternalSample(u64 weight, int tid, int num_frames,
230224
ASGCT_CallFrame *frames, bool truncated,
231225
jint event_type, Event *event);

ddprof-lib/src/main/cpp/vmEntry.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include <jvmti.h>
2222

23+
#include "arch.h"
2324
#include "codeCache.h"
2425
#include "frame.h"
2526

@@ -62,9 +63,12 @@ enum ASGCT_Failure {
6263

6364
typedef struct {
6465
jint bci;
66+
// see https://github.com/async-profiler/async-profiler/pull/1090
67+
LP64_ONLY(jint padding;)
6568
jmethodID method_id;
6669
} ASGCT_CallFrame;
6770

71+
6872
typedef struct {
6973
JNIEnv *env;
7074
jint num_frames;

0 commit comments

Comments
 (0)