Skip to content

Commit e845077

Browse files
authored
Refactor Space::acquire (#1401)
We extract portions of that function into separate functions or lambda expressions in order to make the main decision tree logic terse. We do not intend to change the logic of this function, but we fix some obvious bugs in this commit. - It is a runtime error if we need GC but GC is not initialized. We use `panic!` instead of `assert!`. - Now the value passed to `on_pending_allocation` always includes metadata size.
1 parent 4f793dd commit e845077

File tree

2 files changed

+174
-151
lines changed

2 files changed

+174
-151
lines changed

src/memory_manager.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,9 @@ pub fn gc_poll<VM: VMBinding>(mmtk: &MMTK<VM>, tls: VMMutatorThread) {
545545

546546
if VM::VMCollection::is_collection_enabled() && mmtk.gc_trigger.poll(false, None) {
547547
debug!("Collection required");
548-
assert!(mmtk.state.is_initialized(), "GC is not allowed here: collection is not initialized (did you call initialize_collection()?).");
548+
if !mmtk.state.is_initialized() {
549+
panic!("GC is not allowed here: collection is not initialized (did you call initialize_collection()?).");
550+
}
549551
VM::VMCollection::block_for_gc(tls);
550552
}
551553
}

src/policy/space.rs

Lines changed: 171 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -112,175 +112,196 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
112112
// - If tls is collector, we cannot attempt a GC.
113113
// - If gc is disabled, we cannot attempt a GC.
114114
// - If overcommit is allowed, we don't attempt a GC.
115+
// FIXME: We should allow polling while also allowing over-committing.
116+
// We should change the allocation interface.
115117
let should_poll = VM::VMActivePlan::is_mutator(tls)
116118
&& VM::VMCollection::is_collection_enabled()
117119
&& !alloc_options.on_fail.allow_overcommit();
118-
// Is a GC allowed here? If we should poll but are not allowed to poll, we will panic.
119-
// initialize_collection() has to be called so we know GC is initialized.
120-
let allow_gc = should_poll
121-
&& self.common().global_state.is_initialized()
122-
&& alloc_options.on_fail.allow_gc();
123120

124121
trace!("Reserving pages");
125122
let pr = self.get_page_resource();
126123
let pages_reserved = pr.reserve_pages(pages);
127124
trace!("Pages reserved");
128125
trace!("Polling ..");
129126

127+
// The actual decision tree.
130128
if should_poll && self.get_gc_trigger().poll(false, Some(self.as_space())) {
131-
// Clear the request
132-
pr.clear_request(pages_reserved);
129+
self.not_acquiring(tls, alloc_options, pr, pages_reserved, false);
130+
Address::ZERO
131+
} else {
132+
debug!("Collection not required");
133133

134-
// If we do not want GC on fail, just return zero.
135-
if !alloc_options.on_fail.allow_gc() {
136-
return Address::ZERO;
134+
if let Some(addr) = self.get_new_pages_and_initialize(tls, pages, pr, pages_reserved) {
135+
addr
136+
} else {
137+
self.not_acquiring(tls, alloc_options, pr, pages_reserved, true);
138+
Address::ZERO
137139
}
140+
}
141+
}
138142

139-
// Otherwise do GC here
140-
debug!("Collection required");
141-
assert!(allow_gc, "GC is not allowed here: collection is not initialized (did you call initialize_collection()?).");
142-
143-
// Inform GC trigger about the pending allocation.
144-
let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);
145-
let total_pages_reserved = pages_reserved + meta_pages_reserved;
146-
self.get_gc_trigger()
147-
.policy
148-
.on_pending_allocation(total_pages_reserved);
143+
/// Get new pages from the page resource, and do necessary initialization, including mmapping
144+
/// and zeroing the memory.
145+
///
146+
/// The caller must have reserved pages from the page resource. If successfully acquired pages
147+
/// from the page resource, the reserved pages will be committed.
148+
///
149+
/// Returns `None` if failed to acquire memory from the page resource. The caller should call
150+
/// `pr.clear_request`.
151+
fn get_new_pages_and_initialize(
152+
&self,
153+
tls: VMThread,
154+
pages: usize,
155+
pr: &dyn PageResource<VM>,
156+
pages_reserved: usize,
157+
) -> Option<Address> {
158+
// We need this lock: Othrewise, it is possible that one thread acquires pages in a new chunk, but not yet
159+
// set SFT for it (in grow_space()), and another thread acquires pages in the same chunk, which is not
160+
// a new chunk so grow_space() won't be called on it. The second thread could return a result in the chunk before
161+
// its SFT is properly set.
162+
// We need to minimize the scope of this lock for performance when we have many threads (mutator threads, or GC threads with copying allocators).
163+
// See: https://github.com/mmtk/mmtk-core/issues/610
164+
let lock = self.common().acquire_lock.lock().unwrap();
165+
166+
let Ok(res) = pr.get_new_pages(self.common().descriptor, pages_reserved, pages, tls) else {
167+
return None;
168+
};
149169

150-
VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We have checked that this is mutator
170+
debug!(
171+
"Got new pages {} ({} pages) for {} in chunk {}, new_chunk? {}",
172+
res.start,
173+
res.pages,
174+
self.get_name(),
175+
conversions::chunk_align_down(res.start),
176+
res.new_chunk
177+
);
178+
let bytes = conversions::pages_to_bytes(res.pages);
179+
180+
let mmap = || {
181+
// Mmap the pages and the side metadata, and handle error. In case of any error,
182+
// we will either call back to the VM for OOM, or simply panic.
183+
if let Err(mmap_error) = self
184+
.common()
185+
.mmapper
186+
.ensure_mapped(
187+
res.start,
188+
res.pages,
189+
self.common().mmap_strategy(),
190+
&memory::MmapAnnotation::Space {
191+
name: self.get_name(),
192+
},
193+
)
194+
.and(self.common().metadata.try_map_metadata_space(
195+
res.start,
196+
bytes,
197+
self.get_name(),
198+
))
199+
{
200+
memory::handle_mmap_error::<VM>(mmap_error, tls, res.start, bytes);
201+
}
202+
};
203+
let grow_space = || {
204+
self.grow_space(res.start, bytes, res.new_chunk);
205+
};
151206

152-
// Return zero -- the caller will handle re-attempting allocation
153-
Address::ZERO
207+
// The scope of the lock is important in terms of performance when we have many allocator threads.
208+
if SFT_MAP.get_side_metadata().is_some() {
209+
// If the SFT map uses side metadata, so we have to initialize side metadata first.
210+
mmap();
211+
// then grow space, which will use the side metadata we mapped above
212+
grow_space();
213+
// then we can drop the lock after grow_space()
214+
drop(lock);
154215
} else {
155-
debug!("Collection not required");
216+
// In normal cases, we can drop lock immediately after grow_space()
217+
grow_space();
218+
drop(lock);
219+
// and map side metadata without holding the lock
220+
mmap();
221+
}
156222

157-
// We need this lock: Othrewise, it is possible that one thread acquires pages in a new chunk, but not yet
158-
// set SFT for it (in grow_space()), and another thread acquires pages in the same chunk, which is not
159-
// a new chunk so grow_space() won't be called on it. The second thread could return a result in the chunk before
160-
// its SFT is properly set.
161-
// We need to minimize the scope of this lock for performance when we have many threads (mutator threads, or GC threads with copying allocators).
162-
// See: https://github.com/mmtk/mmtk-core/issues/610
163-
let lock = self.common().acquire_lock.lock().unwrap();
164-
165-
match pr.get_new_pages(self.common().descriptor, pages_reserved, pages, tls) {
166-
Ok(res) => {
167-
debug!(
168-
"Got new pages {} ({} pages) for {} in chunk {}, new_chunk? {}",
169-
res.start,
170-
res.pages,
171-
self.get_name(),
172-
conversions::chunk_align_down(res.start),
173-
res.new_chunk
174-
);
175-
let bytes = conversions::pages_to_bytes(res.pages);
176-
177-
let mmap = || {
178-
// Mmap the pages and the side metadata, and handle error. In case of any error,
179-
// we will either call back to the VM for OOM, or simply panic.
180-
if let Err(mmap_error) = self
181-
.common()
182-
.mmapper
183-
.ensure_mapped(
184-
res.start,
185-
res.pages,
186-
self.common().mmap_strategy(),
187-
&memory::MmapAnnotation::Space {
188-
name: self.get_name(),
189-
},
190-
)
191-
.and(self.common().metadata.try_map_metadata_space(
192-
res.start,
193-
bytes,
194-
self.get_name(),
195-
))
196-
{
197-
memory::handle_mmap_error::<VM>(mmap_error, tls, res.start, bytes);
198-
}
199-
};
200-
let grow_space = || {
201-
self.grow_space(res.start, bytes, res.new_chunk);
202-
};
203-
204-
// The scope of the lock is important in terms of performance when we have many allocator threads.
205-
if SFT_MAP.get_side_metadata().is_some() {
206-
// If the SFT map uses side metadata, so we have to initialize side metadata first.
207-
mmap();
208-
// then grow space, which will use the side metadata we mapped above
209-
grow_space();
210-
// then we can drop the lock after grow_space()
211-
drop(lock);
212-
} else {
213-
// In normal cases, we can drop lock immediately after grow_space()
214-
grow_space();
215-
drop(lock);
216-
// and map side metadata without holding the lock
217-
mmap();
218-
}
219-
220-
// TODO: Concurrent zeroing
221-
if self.common().zeroed {
222-
memory::zero(res.start, bytes);
223-
}
224-
225-
// Some assertions
226-
{
227-
// --- Assert the start of the allocated region ---
228-
// The start address SFT should be correct.
229-
debug_assert_eq!(SFT_MAP.get_checked(res.start).name(), self.get_name());
230-
// The start address is in our space.
231-
debug_assert!(self.address_in_space(res.start));
232-
// The descriptor should be correct.
233-
debug_assert_eq!(
234-
self.common().vm_map().get_descriptor_for_address(res.start),
235-
self.common().descriptor
236-
);
237-
238-
// --- Assert the last byte in the allocated region ---
239-
let last_byte = res.start + bytes - 1;
240-
// The SFT for the last byte in the allocated memory should be correct.
241-
debug_assert_eq!(SFT_MAP.get_checked(last_byte).name(), self.get_name());
242-
// The last byte in the allocated memory should be in this space.
243-
debug_assert!(self.address_in_space(last_byte));
244-
// The descriptor for the last byte should be correct.
245-
debug_assert_eq!(
246-
self.common().vm_map().get_descriptor_for_address(last_byte),
247-
self.common().descriptor
248-
);
249-
}
250-
251-
debug!("Space.acquire(), returned = {}", res.start);
252-
res.start
253-
}
254-
Err(_) => {
255-
drop(lock); // drop the lock immediately
256-
257-
// Clear the request
258-
pr.clear_request(pages_reserved);
259-
260-
// If we do not want GC on fail, just return zero.
261-
if !alloc_options.on_fail.allow_gc() {
262-
return Address::ZERO;
263-
}
264-
265-
// We thought we had memory to allocate, but somehow failed the allocation. Will force a GC.
266-
assert!(
267-
allow_gc,
268-
"Physical allocation failed when GC is not allowed!"
269-
);
270-
271-
let gc_performed = self.get_gc_trigger().poll(true, Some(self.as_space()));
272-
debug_assert!(gc_performed, "GC not performed when forced.");
273-
274-
// Inform GC trigger about the pending allocation.
275-
self.get_gc_trigger()
276-
.policy
277-
.on_pending_allocation(pages_reserved);
278-
279-
VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We asserted that this is mutator.
280-
Address::ZERO
281-
}
282-
}
223+
// TODO: Concurrent zeroing
224+
if self.common().zeroed {
225+
memory::zero(res.start, bytes);
226+
}
227+
228+
// Some assertions
229+
{
230+
// --- Assert the start of the allocated region ---
231+
// The start address SFT should be correct.
232+
debug_assert_eq!(SFT_MAP.get_checked(res.start).name(), self.get_name());
233+
// The start address is in our space.
234+
debug_assert!(self.address_in_space(res.start));
235+
// The descriptor should be correct.
236+
debug_assert_eq!(
237+
self.common().vm_map().get_descriptor_for_address(res.start),
238+
self.common().descriptor
239+
);
240+
241+
// --- Assert the last byte in the allocated region ---
242+
let last_byte = res.start + bytes - 1;
243+
// The SFT for the last byte in the allocated memory should be correct.
244+
debug_assert_eq!(SFT_MAP.get_checked(last_byte).name(), self.get_name());
245+
// The last byte in the allocated memory should be in this space.
246+
debug_assert!(self.address_in_space(last_byte));
247+
// The descriptor for the last byte should be correct.
248+
debug_assert_eq!(
249+
self.common().vm_map().get_descriptor_for_address(last_byte),
250+
self.common().descriptor
251+
);
283252
}
253+
254+
debug!("Space.acquire(), returned = {}", res.start);
255+
Some(res.start)
256+
}
257+
258+
/// Handle the case where [`Space::acquire`] will not or can not acquire pages from the page
259+
/// resource. This may happen when
260+
/// - GC is triggered and the allocation does not allow over-committing, or
261+
/// - the allocation tried to acquire pages from the page resource but ran out of physical
262+
/// memory.
263+
fn not_acquiring(
264+
&self,
265+
tls: VMThread,
266+
alloc_options: AllocationOptions,
267+
pr: &dyn PageResource<VM>,
268+
pages_reserved: usize,
269+
attempted_allocation_and_failed: bool,
270+
) {
271+
// Clear the request
272+
pr.clear_request(pages_reserved);
273+
274+
// If we do not want GC on fail, just return.
275+
if !alloc_options.on_fail.allow_gc() {
276+
return;
277+
}
278+
279+
debug!("Collection required");
280+
281+
if !self.common().global_state.is_initialized() {
282+
// Otherwise do GC here
283+
panic!(
284+
"GC is not allowed here: collection is not initialized \
285+
(did you call initialize_collection()?). \
286+
Out of physical memory: {phy}",
287+
phy = attempted_allocation_and_failed
288+
);
289+
}
290+
291+
if attempted_allocation_and_failed {
292+
// We thought we had memory to allocate, but somehow failed the allocation. Will force a GC.
293+
let gc_performed = self.get_gc_trigger().poll(true, Some(self.as_space()));
294+
debug_assert!(gc_performed, "GC not performed when forced.");
295+
}
296+
297+
// Inform GC trigger about the pending allocation.
298+
let meta_pages_reserved = self.estimate_side_meta_pages(pages_reserved);
299+
let total_pages_reserved = pages_reserved + meta_pages_reserved;
300+
self.get_gc_trigger()
301+
.policy
302+
.on_pending_allocation(total_pages_reserved);
303+
304+
VM::VMCollection::block_for_gc(VMMutatorThread(tls)); // We have checked that this is mutator
284305
}
285306

286307
fn address_in_space(&self, start: Address) -> bool {

0 commit comments

Comments
 (0)