Skip to content

Trying to open buckets when there are pending coordinator packets #770

Closed
@wks

Description

@wks

I observed an error when running Ruby bootstrap tests in the debug mode. The following is a stack trace.

   panicked at 'GCWorker attempted to open buckets when there are pending coordinator work packets', /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:242:9
   [2023-03-07T08:09:40Z INFO  mmtk::scheduler::gc_work] End of GC (251/1276 pages, took 155 ms)
      0: mmtk_ruby::handle_gc_thread_panic
                at /home/wks/projects/mmtk-github/mmtk-ruby/mmtk/src/lib.rs:95:14
      1: mmtk_ruby::set_panic_hook::{{closure}}
                at /home/wks/projects/mmtk-github/mmtk-ruby/mmtk/src/lib.rs:124:13
      2: <alloc::boxed::Box<F,A> as core::ops::function::Fn<Args>>::call
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/alloc/src/boxed.rs:2032:9
      3: std::panicking::rust_panic_with_hook
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/panicking.rs:692:13
      4: std::panicking::begin_panic_handler::{{closure}}
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/panicking.rs:577:13
      5: std::sys_common::backtrace::__rust_end_short_backtrace
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/sys_common/backtrace.rs:137:18
      6: rust_begin_unwind
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/panicking.rs:575:5
      7: core::panicking::panic_fmt
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/core/src/panicking.rs:64:14
      8: mmtk::scheduler::scheduler::GCWorkScheduler<VM>::are_buckets_drained
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:242:9
      9: mmtk::scheduler::scheduler::GCWorkScheduler<VM>::new::{{closure}}
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:85:29
     10: <alloc::boxed::Box<F,A> as core::ops::function::Fn<Args>>::call
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/alloc/src/boxed.rs:2032:9
     11: mmtk::scheduler::work_bucket::WorkBucket<VM>::update
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/work_bucket.rs:207:40
     12: mmtk::scheduler::scheduler::GCWorkScheduler<VM>::update_buckets
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:280:33
     13: mmtk::scheduler::scheduler::GCWorkScheduler<VM>::poll_slow
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:432:24
     14: mmtk::scheduler::scheduler::GCWorkScheduler<VM>::poll::{{closure}}
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:402:32
     15: core::option::Option<T>::unwrap_or_else
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/core/src/option.rs:828:21
     16: mmtk::scheduler::scheduler::GCWorkScheduler<VM>::poll
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/scheduler.rs:401:9
     17: mmtk::scheduler::worker::GCWorker<VM>::poll::{{closure}}
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/worker.rs:175:32
     18: core::option::Option<T>::unwrap_or_else
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/core/src/option.rs:828:21
     19: mmtk::scheduler::worker::GCWorker<VM>::poll
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/worker.rs:171:9
     20: mmtk::scheduler::worker::GCWorker<VM>::run
                at /home/wks/projects/mmtk-github/mmtk-core/src/scheduler/worker.rs:190:28
     21: mmtk::memory_manager::start_worker
                at /home/wks/projects/mmtk-github/mmtk-core/src/memory_manager.rs:446:5
     22: <mmtk_ruby::collection::VMCollection as mmtk::vm::collection::Collection<mmtk_ruby::Ruby>>::spawn_gc_thread::{{closure}}
                at /home/wks/projects/mmtk-github/mmtk-ruby/mmtk/src/collection.rs:66:25
     23: std::sys_common::backtrace::__rust_begin_short_backtrace
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/sys_common/backtrace.rs:121:18
     24: std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/thread/mod.rs:550:17
     25: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/core/src/panic/unwind_safe.rs:271:9
     26: std::panicking::try::do_call
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/panicking.rs:483:40
     27: __rust_try
     28: std::panicking::try
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/panicking.rs:447:19
     29: std::panic::catch_unwind
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/panic.rs:137:14
     30: std::thread::Builder::spawn_unchecked_::{{closure}}
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/thread/mod.rs:549:30
     31: core::ops::function::FnOnce::call_once{{vtable.shim}}
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/core/src/ops/function.rs:507:5
     32: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/alloc/src/boxed.rs:2000:9
     33: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/alloc/src/boxed.rs:2000:9
     34: std::sys::unix::thread::Thread::new::thread_start
                at /rustc/d5a82bbd26e1ad8b7401f6a718a9c57c96905483/library/std/src/sys/unix/thread.rs:108:17
     35: <unknown>
     36: <unknown>

Frame 13 shows it is calling self.update_buckets() in poll_slow:

// GCWorkScheduler::poll_slow
                } else if self.pending_coordinator_packets.load(Ordering::SeqCst) == 0 {
                    // See if any bucket has a sentinel.
                    if self.schedule_sentinels() {
                        // We're not going to sleep since new work packets are just scheduled.
                        break 'polling_loop;
                    }
                    // Try to open new buckets.
                    if self.update_buckets() {  // **************HERE******************
                        // We're not going to sleep since a new bucket is just open.
                        break 'polling_loop;
                    }
                    debug_assert!(!self.worker_group.has_designated_work());
                    // The current pause is finished if we can't open more buckets.
                    worker.sender.send(CoordinatorMessage::Finish).unwrap();
                }

It tries to open each bucket. Each bucket calls are_buckets_drained on previous buckets to test if it should be opened:

    fn are_buckets_drained(&self, buckets: &[WorkBucketStage]) -> bool {
        debug_assert!(
            self.pending_coordinator_packets.load(Ordering::SeqCst) == 0,
            "GCWorker attempted to open buckets when there are pending coordinator work packets"
        );
        buckets.iter().all(|&b| self.work_buckets[b].is_drained())
    }

This is strange because the self.update_buckets() call is within the branch

} else if self.pending_coordinator_packets.load(Ordering::SeqCst) == 0 {

But the debug_assert failed. This means self.pending_coordinator_packets is changed somehow.

This error randomly occurs when running different test cases. I guess there are still some synchronisation errors somewhere.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions