Skip to content

Deadlock on destruction #334

@ktf

Description

@ktf

I think I see some threading related deadlock between:

#0  __lll_lock_wait () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
#1  0x00007f241030a023 in __GI___pthread_mutex_lock (mutex=0x7f2414d19010) at ../nptl/pthread_mutex_lock.c:78
#2  0x00007f240850e1db in boost::interprocess::ipcdetail::posix_mutex::lock (this=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/posix/mutex.hpp:96
#3  0x00007f240850e260 in boost::interprocess::ipcdetail::timeout_when_locking_aware_lock<boost::interprocess::ipcdetail::posix_mutex> (m=...)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/detail/common_algorithms.hpp:89
#4  boost::interprocess::interprocess_mutex::lock (this=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/interprocess_mutex.hpp:160
#5  boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex>::scoped_lock (m=..., this=<synthetic pointer>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/scoped_lock.hpp:81
#6  boost::interprocess::ipcdetail::condition_any_algorithm<boost::interprocess::ipcdetail::shm_named_condition::internal_condition_members>::signal (
    data=..., broadcast=broadcast@entry=true)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/detail/condition_any_algorithm.hpp:87
#7  0x00007f240850e54a in boost::interprocess::ipcdetail::condition_any_wrapper<boost::interprocess::ipcdetail::shm_named_condition::internal_condition_members>::notify_all (this=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/detail/condition_any_algorithm.hpp:177
#8  boost::interprocess::ipcdetail::shm_named_condition::notify_all (this=0x4c37248)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/shm/named_condition.hpp:208
#9  boost::interprocess::named_condition::notify_all (this=0x4c37248)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/named_condition.hpp:157
#10 fair::mq::shmem::Manager::UnsubscribeFromRegionEvents (this=0x4c37190)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/shmem/Manager.h:405
#11 0x00007f2408540e29 in fair::mq::shmem::Manager::~Manager (this=0x4c37190, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/shmem/Manager.h:576
#12 0x00007f24085415f5 in std::default_delete<fair::mq::shmem::Manager>::operator() (this=<optimized out>, __ptr=0x4c37190)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/unique_ptr.h:78
#13 std::unique_ptr<fair::mq::shmem::Manager, std::default_delete<fair::mq::shmem::Manager> >::~unique_ptr (this=0x4c0dd58, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/unique_ptr.h:268
#14 fair::mq::shmem::TransportFactory::~TransportFactory (this=0x4c0dcc0, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/shmem/TransportFactory.h:182
#15 0x000000000042a8ea in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (this=0x4c0dcb0)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/shared_ptr_base.h:154
#16 0x00007f24084f3712 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=<optimized out>, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/shared_ptr_base.h:684
#17 std::__shared_ptr<FairMQTransportFactory, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (this=<optimized out>, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/shared_ptr_base.h:1123
#18 std::shared_ptr<FairMQTransportFactory>::~shared_ptr (this=<optimized out>, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/shared_ptr.h:93
#19 std::pair<fair::mq::Transport const, std::shared_ptr<FairMQTransportFactory> >::~pair (this=<optimized out>, __in_chrg=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/bits/stl_pair.h:198

and

#0  0x00007f241030df85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f2228b3cf90, expected=0, futex_word=0x4c37388)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1  __pthread_cond_wait_common (abstime=0x7f2228b3cf90, mutex=0x4c37338, cond=0x4c37360) at pthread_cond_wait.c:539
#2  __pthread_cond_timedwait (cond=0x4c37360, mutex=0x4c37338, abstime=0x7f2228b3cf90) at pthread_cond_wait.c:667
#3  0x00007f2408542dec in __gthread_cond_timedwait (__abs_timeout=0x7f2228b3cf90, __mutex=<optimized out>, __cond=0x4c37360)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/x86_64-unknown-linux-gnu/bits/gthr-default.h:871
#4  std::condition_variable::__wait_until_impl<std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (__atime=..., __lock=..., this=0x4c37360)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:166
#5  std::condition_variable::wait_until<std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (__atime=..., __lock=..., this=0x4c37360)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:106
#6  std::condition_variable::wait_until<std::chrono::_V2::system_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, fair::mq::shmem::Manager::SendHeartbeats()::{lambda()#2}>(std::unique_lock<std::mutex>&, std::chrono::time_point<std::chrono::_V2::system_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > const&, fair::mq::shmem::Manager::SendHeartbeats()::{lambda()#2}) (__p=..., __atime=..., __lock=..., this=0x4c37360)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:129
#7  std::condition_variable::wait_for<long, std::ratio<1l, 1000l>, fair::mq::shmem::Manager::SendHeartbeats()::{lambda()#2}>(std::unique_lock<std::mutex>&, std::chrono::duration<long, std::ratio<1l, 1000l> > const&, fair::mq::shmem::Manager::SendHeartbeats()::{lambda()#2}) (__p=..., __rtime=..., __lock=...,
    this=0x4c37360) at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:145
#8  fair::mq::shmem::Manager::SendHeartbeats (this=0x4c37190) at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/shmem/Manager.h:461
#9  0x00007f24047706df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#10 0x00007f24103076db in start_thread (arg=0x7f2228b43700) at pthread_create.c:463
#11 0x00007f2403e2da3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

The other threads are doing:

#0  0x00007f241030d9f3 in futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x7f2414d19060)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:88
#1  __pthread_cond_wait_common (abstime=0x0, mutex=0x7f2414d19010, cond=0x7f2414d19038) at pthread_cond_wait.c:502
#2  __pthread_cond_wait (cond=cond@entry=0x7f2414d19038, mutex=mutex@entry=0x7f2414d19010) at pthread_cond_wait.c:655
#3  0x00007f240851394f in boost::interprocess::ipcdetail::posix_condition::do_wait (mut=..., this=0x7f2414d19038)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/posix/condition.hpp:174
#4  boost::interprocess::ipcdetail::posix_condition::wait<boost::interprocess::ipcdetail::internal_mutex_lock<boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> > > (lock=<synthetic pointer>..., this=0x7f2414d19038)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/posix/condition.hpp:67
#5  boost::interprocess::interprocess_condition::wait<boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> > (
    lock=<synthetic pointer>..., this=<optimized out>)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/interprocess_condition.hpp:107
#6  boost::interprocess::ipcdetail::condition_any_algorithm<boost::interprocess::ipcdetail::shm_named_condition::internal_condition_members>::do_wait<boost::interprocess::ipcdetail::internal_mutex_lock<boost::interprocess::scoped_lock<boost::interprocess::named_mutex> > > (data=..., lock=...)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/detail/condition_any_algorithm.hpp:126
#7  0x00007f2408539e50 in boost::interprocess::ipcdetail::condition_any_algorithm<boost::interprocess::ipcdetail::shm_named_condition::internal_condition_members>::wait<boost::interprocess::ipcdetail::internal_mutex_lock<boost::interprocess::scoped_lock<boost::interprocess::named_mutex> > > (abs_time=...,
    tout_enabled=false, lock=..., data=...)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/detail/condition_any_algorithm.hpp:108
#8  boost::interprocess::ipcdetail::condition_any_wrapper<boost::interprocess::ipcdetail::shm_named_condition::internal_condition_members>::wait<boost::interprocess::ipcdetail::internal_mutex_lock<boost::interprocess::scoped_lock<boost::interprocess::named_mutex> > > (lock=..., this=0x7f2414d19010)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/detail/condition_any_algorithm.hpp:184
#9  boost::interprocess::ipcdetail::shm_named_condition::wait<boost::interprocess::ipcdetail::internal_mutex_lock<boost::interprocess::scoped_lock<boost::interprocess::named_mutex> > > (lock=..., this=0x4c37248)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/shm/named_condition.hpp:212
#10 boost::interprocess::named_condition::wait<boost::interprocess::scoped_lock<boost::interprocess::named_mutex> > (lock=..., this=0x4c37248)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/boost/v1.74.0-1/include/boost/interprocess/sync/named_condition.hpp:163
#11 fair::mq::shmem::Manager::RegionEventsSubscription (this=0x4c37190)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/shmem/Manager.h:433
#12 0x00007f24047706df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#13 0x00007f24103076db in start_thread (arg=0x7f2227340700) at pthread_create.c:463
#14 0x00007f2403e2da3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
#0  0x00007f241030df85 in futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f23e920de30, expected=0, futex_word=0x4aa4a44)
    at ../sysdeps/unix/sysv/linux/futex-internal.h:205
#1  __pthread_cond_wait_common (abstime=0x7f23e920de30, mutex=0x4aa49f0, cond=0x4aa4a18) at pthread_cond_wait.c:539
#2  __pthread_cond_timedwait (cond=0x4aa4a18, mutex=0x4aa49f0, abstime=0x7f23e920de30) at pthread_cond_wait.c:667
#3  0x00007f24085d59c3 in __gthread_cond_timedwait (__abs_timeout=0x7f23e920de30, __mutex=<optimized out>, __cond=0x4aa4a18)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/x86_64-unknown-linux-gnu/bits/gthr-default.h:871
#4  std::condition_variable::__wait_until_impl<std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (__atime=..., __lock=..., this=0x4aa4a18)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:166
#5  std::condition_variable::wait_until<std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (__atime=..., __lock=..., this=0x4aa4a18)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:106
#6  std::condition_variable::wait_for<long, std::ratio<1l, 1000l> > (__rtime=..., __lock=..., this=0x4aa4a18)
    at /home/alitrain/eulisse/sw2/ubuntu1804_x86-64/GCC-Toolchain/v7.3.0-alice2-1/include/c++/7.3.0/condition_variable:138
#7  fair::mq::StateQueue::WaitForNext (this=this@entry=0x4aa49a0) at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/StateQueue.h:33
#8  0x00007f24085d25f8 in fair::mq::plugins::Control::RunShutdownSequence (this=this@entry=0x4aa48e0)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/plugins/Control.cxx:453
#9  0x00007f24085d4f20 in fair::mq::plugins::Control::StaticMode (this=0x4aa48e0)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/plugins/Control.cxx:383
#10 0x00007f24047706df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#11 0x00007f24103076db in start_thread (arg=0x7f23e9214700) at pthread_create.c:463
#12 0x00007f2403e2da3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
[Switching to thread 2 (Thread 0x7f23ea216700 (LWP 53118))]
#0  0x00007f2403df0a30 in __GI___nanosleep (requested_time=requested_time@entry=0x7f23ea2105e0, remaining=remaining@entry=0x0)
    at ../sysdeps/unix/sysv/linux/nanosleep.c:28
28      ../sysdeps/unix/sysv/linux/nanosleep.c: No such file or directory.
#0  0x00007f2410308d2d in __GI___pthread_timedjoin_ex (threadid=139792211855104, thread_return=0x0, abstime=0x0, block=<optimized out>)
    at pthread_join_common.c:89
#1  0x00007f2404770933 in std::thread::join() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#2  0x00007f24085d5494 in fair::mq::plugins::Control::SignalHandler (this=0x4aa48e0)
    at /home/alitrain/eulisse/sw2/SOURCES/FairMQ/v1.4.30/v1.4.30/fairmq/plugins/Control.cxx:405
#3  0x00007f24047706df in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#4  0x00007f24103076db in start_thread (arg=0x7f23e8a13700) at pthread_create.c:463
#5  0x00007f2403e2da3f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

Does it ring any bell? This is while shutting down a process...

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions