Skip to content

Commit 0e98fc1

Browse files
author
Kim Barrett
committed
8253237: [REDO] Improve large object handling during evacuation
8253238: [REDO] Improve object array chunking test in G1's copy_to_survivor_space 8253236: [REDO] Investigate fast-path for scanning only objects with references during gc Generate multiple partial array tasks for large objArrays. Reviewed-by: sjohanss, ayang, tschatzl
1 parent d1f9b8a commit 0e98fc1

File tree

8 files changed

+439
-62
lines changed

8 files changed

+439
-62
lines changed

src/hotspot/share/gc/g1/g1ParScanThreadState.cpp

Lines changed: 77 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@
3131
#include "gc/g1/g1RootClosures.hpp"
3232
#include "gc/g1/g1StringDedup.hpp"
3333
#include "gc/g1/g1Trace.hpp"
34+
#include "gc/shared/partialArrayTaskStepper.inline.hpp"
3435
#include "gc/shared/taskqueue.inline.hpp"
3536
#include "memory/allocation.inline.hpp"
3637
#include "oops/access.inline.hpp"
3738
#include "oops/oop.inline.hpp"
39+
#include "runtime/atomic.hpp"
3840
#include "runtime/prefetch.inline.hpp"
3941
#include "utilities/globalDefinitions.hpp"
4042
#include "utilities/macros.hpp"
@@ -50,6 +52,7 @@
5052
G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
5153
G1RedirtyCardsQueueSet* rdcqs,
5254
uint worker_id,
55+
uint n_workers,
5356
size_t young_cset_length,
5457
size_t optional_cset_length)
5558
: _g1h(g1h),
@@ -70,6 +73,8 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
7073
_surviving_young_words(NULL),
7174
_surviving_words_length(young_cset_length + 1),
7275
_old_gen_is_full(false),
76+
_partial_objarray_chunk_size(ParGCArrayScanChunk),
77+
_partial_array_stepper(n_workers),
7378
_num_optional_regions(optional_cset_length),
7479
_numa(g1h->numa()),
7580
_obj_alloc_stat(NULL)
@@ -212,48 +217,60 @@ void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
212217

213218
assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
214219
assert(from_obj->is_objArray(), "must be obj array");
215-
objArrayOop from_obj_array = objArrayOop(from_obj);
216-
// The from-space object contains the real length.
217-
int length = from_obj_array->length();
218-
219220
assert(from_obj->is_forwarded(), "must be forwarded");
220-
oop to_obj = from_obj->forwardee();
221+
222+
oop to_obj = from_obj->forwardee();
221223
assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
222-
objArrayOop to_obj_array = objArrayOop(to_obj);
223-
// We keep track of the next start index in the length field of the
224-
// to-space object.
225-
int next_index = to_obj_array->length();
226-
assert(0 <= next_index && next_index < length,
227-
"invariant, next index: %d, length: %d", next_index, length);
228-
229-
int start = next_index;
230-
int end = length;
231-
int remainder = end - start;
232-
// We'll try not to push a range that's smaller than ParGCArrayScanChunk.
233-
if (remainder > 2 * ParGCArrayScanChunk) {
234-
end = start + ParGCArrayScanChunk;
235-
to_obj_array->set_length(end);
236-
// Push the remainder before we process the range in case another
237-
// worker has run out of things to do and can steal it.
224+
assert(to_obj->is_objArray(), "must be obj array");
225+
objArrayOop to_array = objArrayOop(to_obj);
226+
227+
PartialArrayTaskStepper::Step step
228+
= _partial_array_stepper.next(objArrayOop(from_obj),
229+
to_array,
230+
_partial_objarray_chunk_size);
231+
for (uint i = 0; i < step._ncreate; ++i) {
238232
push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
239-
} else {
240-
assert(length == end, "sanity");
241-
// We'll process the final range for this object. Restore the length
242-
// so that the heap remains parsable in case of evacuation failure.
243-
to_obj_array->set_length(end);
244233
}
245234

246-
HeapRegion* hr = _g1h->heap_region_containing(to_obj);
235+
HeapRegion* hr = _g1h->heap_region_containing(to_array);
247236
G1ScanInYoungSetter x(&_scanner, hr->is_young());
248-
// Process indexes [start,end). It will also process the header
249-
// along with the first chunk (i.e., the chunk with start == 0).
250-
// Note that at this point the length field of to_obj_array is not
251-
// correct given that we are using it to keep track of the next
252-
// start index. oop_iterate_range() (thankfully!) ignores the length
253-
// field and only relies on the start / end parameters. It does
254-
// however return the size of the object which will be incorrect. So
255-
// we have to ignore it even if we wanted to use it.
256-
to_obj_array->oop_iterate_range(&_scanner, start, end);
237+
// Process claimed task. The length of to_array is not correct, but
238+
// fortunately the iteration ignores the length field and just relies
239+
// on start/end.
240+
to_array->oop_iterate_range(&_scanner,
241+
step._index,
242+
step._index + _partial_objarray_chunk_size);
243+
}
244+
245+
MAYBE_INLINE_EVACUATION
246+
void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
247+
oop from_obj,
248+
oop to_obj) {
249+
assert(from_obj->is_objArray(), "precondition");
250+
assert(from_obj->is_forwarded(), "precondition");
251+
assert(from_obj->forwardee() == to_obj, "precondition");
252+
assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
253+
assert(to_obj->is_objArray(), "precondition");
254+
255+
objArrayOop to_array = objArrayOop(to_obj);
256+
257+
PartialArrayTaskStepper::Step step
258+
= _partial_array_stepper.start(objArrayOop(from_obj),
259+
to_array,
260+
_partial_objarray_chunk_size);
261+
262+
// Push any needed partial scan tasks. Pushed before processing the
263+
// intitial chunk to allow other workers to steal while we're processing.
264+
for (uint i = 0; i < step._ncreate; ++i) {
265+
push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
266+
}
267+
268+
G1ScanInYoungSetter x(&_scanner, dest_attr.is_young());
269+
// Process the initial chunk. No need to process the type in the
270+
// klass, as it will already be handled by processing the built-in
271+
// module. The length of to_array is not correct, but fortunately
272+
// the iteration ignores that length field and relies on start/end.
273+
to_array->oop_iterate_range(&_scanner, 0, step._index);
257274
}
258275

259276
MAYBE_INLINE_EVACUATION
@@ -409,7 +426,10 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
409426
assert(region_attr.is_in_cset(),
410427
"Unexpected region attr type: %s", region_attr.get_type_str());
411428

412-
const size_t word_sz = old->size();
429+
// Get the klass once. We'll need it again later, and this avoids
430+
// re-decoding when it's compressed.
431+
Klass* klass = old->klass();
432+
const size_t word_sz = old->size_given_klass(klass);
413433

414434
uint age = 0;
415435
G1HeapRegionAttr dest_attr = next_region_attr(region_attr, old_mark, age);
@@ -476,6 +496,20 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
476496
obj->set_mark_raw(old_mark);
477497
}
478498

499+
// Most objects are not arrays, so do one array check rather than
500+
// checking for each array category for each object.
501+
if (klass->is_array_klass()) {
502+
if (klass->is_objArray_klass()) {
503+
start_partial_objarray(dest_attr, old, obj);
504+
} else {
505+
// Nothing needs to be done for typeArrays. Body doesn't contain
506+
// any oops to scan, and the type in the klass will already be handled
507+
// by processing the built-in module.
508+
assert(klass->is_typeArray_klass(), "invariant");
509+
}
510+
return obj;
511+
}
512+
479513
if (G1StringDedup::is_enabled()) {
480514
const bool is_from_young = region_attr.is_young();
481515
const bool is_to_young = dest_attr.is_young();
@@ -489,17 +523,10 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
489523
obj);
490524
}
491525

492-
if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
493-
// We keep track of the next start index in the length field of
494-
// the to-space object. The actual length can be found in the
495-
// length field of the from-space object.
496-
arrayOop(obj)->set_length(0);
497-
do_partial_array(PartialArrayScanTask(old));
498-
} else {
499-
G1ScanInYoungSetter x(&_scanner, dest_attr.is_young());
500-
obj->oop_iterate_backwards(&_scanner);
501-
}
526+
G1ScanInYoungSetter x(&_scanner, dest_attr.is_young());
527+
obj->oop_iterate_backwards(&_scanner);
502528
return obj;
529+
503530
} else {
504531
_plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
505532
return forward_ptr;
@@ -518,7 +545,9 @@ G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id)
518545
assert(worker_id < _n_workers, "out of bounds access");
519546
if (_states[worker_id] == NULL) {
520547
_states[worker_id] =
521-
new G1ParScanThreadState(_g1h, _rdcqs, worker_id, _young_cset_length, _optional_cset_length);
548+
new G1ParScanThreadState(_g1h, _rdcqs,
549+
worker_id, _n_workers,
550+
_young_cset_length, _optional_cset_length);
522551
}
523552
return _states[worker_id];
524553
}

src/hotspot/share/gc/g1/g1ParScanThreadState.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "gc/g1/g1RemSet.hpp"
3333
#include "gc/g1/heapRegionRemSet.hpp"
3434
#include "gc/shared/ageTable.hpp"
35+
#include "gc/shared/partialArrayTaskStepper.hpp"
3536
#include "gc/shared/taskqueue.hpp"
3637
#include "memory/allocation.hpp"
3738
#include "oops/oop.hpp"
@@ -79,6 +80,9 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
7980
// Indicates whether in the last generation (old) there is no more space
8081
// available for allocation.
8182
bool _old_gen_is_full;
83+
// Size (in elements) of a partial objArray task chunk.
84+
int _partial_objarray_chunk_size;
85+
PartialArrayTaskStepper _partial_array_stepper;
8286

8387
G1RedirtyCardsQueue& redirty_cards_queue() { return _rdcq; }
8488
G1CardTable* ct() { return _ct; }
@@ -105,6 +109,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
105109
G1ParScanThreadState(G1CollectedHeap* g1h,
106110
G1RedirtyCardsQueueSet* rdcqs,
107111
uint worker_id,
112+
uint n_workers,
108113
size_t young_cset_length,
109114
size_t optional_cset_length);
110115
virtual ~G1ParScanThreadState();
@@ -157,6 +162,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
157162

158163
private:
159164
void do_partial_array(PartialArrayScanTask task);
165+
void start_partial_objarray(G1HeapRegionAttr dest_dir, oop from, oop to);
160166

161167
HeapWord* allocate_copy_slow(G1HeapRegionAttr* dest_attr,
162168
oop old,
@@ -249,9 +255,6 @@ class G1ParScanThreadStateSet : public StackObj {
249255
G1ParScanThreadState* state_for_worker(uint worker_id);
250256

251257
const size_t* surviving_young_words() const;
252-
253-
private:
254-
G1ParScanThreadState* new_par_scan_state(uint worker_id, size_t young_cset_length);
255258
};
256259

257260
#endif // SHARE_GC_G1_G1PARSCANTHREADSTATE_HPP
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#include "precompiled.hpp"
26+
#include "gc/shared/partialArrayTaskStepper.hpp"
27+
#include "oops/arrayOop.hpp"
28+
#include "utilities/globalDefinitions.hpp"
29+
30+
static uint compute_task_limit(uint n_workers) {
31+
// Don't need more than n_workers tasks at a time. But allowing up to
32+
// that maximizes available parallelism.
33+
return n_workers;
34+
}
35+
36+
static uint compute_task_fanout(uint task_limit) {
37+
assert(task_limit > 0, "precondition");
38+
// There is a tradeoff between providing parallelism more quickly and
39+
// number of enqueued tasks. A constant fanout may be too slow when
40+
// parallelism (and so task_limit) is large. A constant fraction might
41+
// be overly eager. Using log2 attempts to balance between those.
42+
uint result = log2_uint(task_limit);
43+
// result must be > 0. result should be > 1 if task_limit > 1, to
44+
// provide some potentially parallel tasks. But don't just +1 to
45+
// avoid otherwise increasing rate of task generation.
46+
if (result < 2) ++result;
47+
return result;
48+
}
49+
50+
PartialArrayTaskStepper::PartialArrayTaskStepper(uint n_workers) :
51+
_task_limit(compute_task_limit(n_workers)),
52+
_task_fanout(compute_task_fanout(_task_limit))
53+
{}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
#ifndef SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
26+
#define SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP
27+
28+
#include "oops/arrayOop.hpp"
29+
#include "utilities/globalDefinitions.hpp"
30+
31+
// Helper for handling PartialArrayTasks.
32+
//
33+
// When an array is large, we want to split it up into chunks that can be
34+
// processed in parallel. Each task (implicitly) represents such a chunk.
35+
// We can enqueue multiple tasks at the same time. We want to enqueue
36+
// enough tasks to benefit from the available parallelism, while not so many
37+
// as to substantially expand the task queues.
38+
//
39+
// A task directly refers to the from-space array. The from-space array's
40+
// forwarding pointer refers to the associated to-space array, and its
41+
// length is the actual length. The to-space array's length field is used to
42+
// indicate processing progress. It is the starting index of the next chunk
43+
// to process, or equals the actual length when there are no more chunks to
44+
// be processed.
45+
class PartialArrayTaskStepper {
46+
public:
47+
PartialArrayTaskStepper(uint n_workers);
48+
49+
struct Step {
50+
int _index; // Array index for the step.
51+
uint _ncreate; // Number of new tasks to create.
52+
};
53+
54+
// Set to's length to the end of the initial chunk, which is the start of
55+
// the first partial task if the array is large enough to need splitting.
56+
// Returns a Step with _index being that index and _ncreate being the
57+
// initial number of partial tasks to enqueue.
58+
inline Step start(arrayOop from, arrayOop to, int chunk_size) const;
59+
60+
// Increment to's length by chunk_size to claim the next chunk. Returns a
61+
// Step with _index being the starting index of the claimed chunk and
62+
// _ncreate being the number of additional partial tasks to enqueue.
63+
// precondition: chunk_size must be the same as used to start the task sequence.
64+
inline Step next(arrayOop from, arrayOop to, int chunk_size) const;
65+
66+
class TestSupport; // For unit tests
67+
68+
private:
69+
// Limit on the number of partial array tasks to create for a given array.
70+
uint _task_limit;
71+
// Maximum number of new tasks to create when processing an existing task.
72+
uint _task_fanout;
73+
74+
// Split start/next into public part dealing with oops and private
75+
// impl dealing with lengths and pointers to lengths, for unit testing.
76+
// length is the actual length obtained from the from-space object.
77+
// to_length_addr is the address of the to-space object's length value.
78+
inline Step start_impl(int length, int* to_length_addr, int chunk_size) const;
79+
inline Step next_impl(int length, int* to_length_addr, int chunk_size) const;
80+
};
81+
82+
#endif // SHARE_GC_SHARED_PARTIALARRAYTASKSTEPPER_HPP

0 commit comments

Comments
 (0)