Skip to content

Commit 9b154ab

Browse files
authored
gh-83069: Use efficient event-driven subprocess.Popen.wait() on Linux / macOS / BSD (#144047)
1 parent 8fe8a94 commit 9b154ab

File tree

5 files changed

+301
-5
lines changed

5 files changed

+301
-5
lines changed

Doc/library/subprocess.rst

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -803,14 +803,29 @@ Instances of the :class:`Popen` class have the following methods:
803803

804804
.. note::
805805

806-
When the ``timeout`` parameter is not ``None``, then (on POSIX) the
807-
function is implemented using a busy loop (non-blocking call and short
808-
sleeps). Use the :mod:`asyncio` module for an asynchronous wait: see
806+
When ``timeout`` is not ``None`` and the platform supports it, an
807+
efficient event-driven mechanism is used to wait for process termination:
808+
809+
- Linux >= 5.3 uses :func:`os.pidfd_open` + :func:`select.poll`
810+
- macOS and other BSD variants use :func:`select.kqueue` +
811+
``KQ_FILTER_PROC`` + ``KQ_NOTE_EXIT``
812+
- Windows uses ``WaitForSingleObject``
813+
814+
If none of these mechanisms are available, the function falls back to a
815+
busy loop (non-blocking call and short sleeps).
816+
817+
.. note::
818+
819+
Use the :mod:`asyncio` module for an asynchronous wait: see
809820
:class:`asyncio.create_subprocess_exec`.
810821

811822
.. versionchanged:: 3.3
812823
*timeout* was added.
813824

825+
.. versionchanged:: 3.15
826+
if *timeout* is not ``None``, use efficient event-driven implementation
827+
on Linux >= 5.3 and macOS / BSD.
828+
814829
.. method:: Popen.communicate(input=None, timeout=None)
815830

816831
Interact with process: Send data to stdin. Read data from stdout and stderr,

Doc/whatsnew/3.15.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,20 @@ ssl
743743

744744
(Contributed by Ron Frederick in :gh:`138252`.)
745745

746+
subprocess
747+
----------
748+
749+
* :meth:`subprocess.Popen.wait`: when ``timeout`` is not ``None`` and the
750+
platform supports it, an efficient event-driven mechanism is used to wait for
751+
process termination:
752+
753+
- Linux >= 5.3 uses :func:`os.pidfd_open` + :func:`select.poll`.
754+
- macOS and other BSD variants use :func:`select.kqueue` + ``KQ_FILTER_PROC`` + ``KQ_NOTE_EXIT``.
755+
- Windows keeps using ``WaitForSingleObject`` (unchanged).
756+
757+
If none of these mechanisms are available, the function falls back to the
758+
traditional busy loop (non-blocking call and short sleeps).
759+
(Contributed by Giampaolo Rodola in :gh:`83069`).
746760

747761
symtable
748762
--------

Lib/subprocess.py

Lines changed: 143 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,60 @@ def _use_posix_spawn():
748748
return False
749749

750750

751+
def _can_use_pidfd_open():
752+
# Availability: Linux >= 5.3
753+
if not hasattr(os, "pidfd_open"):
754+
return False
755+
try:
756+
pidfd = os.pidfd_open(os.getpid(), 0)
757+
except OSError as err:
758+
if err.errno in {errno.EMFILE, errno.ENFILE}:
759+
# transitory 'too many open files'
760+
return True
761+
# likely blocked by security policy like SECCOMP (EPERM,
762+
# EACCES, ENOSYS)
763+
return False
764+
else:
765+
os.close(pidfd)
766+
return True
767+
768+
769+
def _can_use_kqueue():
770+
# Availability: macOS, BSD
771+
names = (
772+
"kqueue",
773+
"KQ_EV_ADD",
774+
"KQ_EV_ONESHOT",
775+
"KQ_FILTER_PROC",
776+
"KQ_NOTE_EXIT",
777+
)
778+
if not all(hasattr(select, x) for x in names):
779+
return False
780+
kq = None
781+
try:
782+
kq = select.kqueue()
783+
kev = select.kevent(
784+
os.getpid(),
785+
filter=select.KQ_FILTER_PROC,
786+
flags=select.KQ_EV_ADD | select.KQ_EV_ONESHOT,
787+
fflags=select.KQ_NOTE_EXIT,
788+
)
789+
kq.control([kev], 1, 0)
790+
return True
791+
except OSError as err:
792+
if err.errno in {errno.EMFILE, errno.ENFILE}:
793+
# transitory 'too many open files'
794+
return True
795+
return False
796+
finally:
797+
if kq is not None:
798+
kq.close()
799+
800+
801+
_CAN_USE_PIDFD_OPEN = not _mswindows and _can_use_pidfd_open()
802+
_CAN_USE_KQUEUE = not _mswindows and _can_use_kqueue()
803+
804+
751805
# These are primarily fail-safe knobs for negatives. A True value does not
752806
# guarantee the given libc/syscall API will be used.
753807
_USE_POSIX_SPAWN = _use_posix_spawn()
@@ -2046,14 +2100,100 @@ def _try_wait(self, wait_flags):
20462100
sts = 0
20472101
return (pid, sts)
20482102

2103+
def _wait_pidfd(self, timeout):
2104+
"""Wait for PID to terminate using pidfd_open() + poll().
2105+
Linux >= 5.3 only.
2106+
"""
2107+
if not _CAN_USE_PIDFD_OPEN:
2108+
return False
2109+
try:
2110+
pidfd = os.pidfd_open(self.pid, 0)
2111+
except OSError:
2112+
# May be:
2113+
# - ESRCH: no such process
2114+
# - EMFILE, ENFILE: too many open files (usually 1024)
2115+
# - ENODEV: anonymous inode filesystem not supported
2116+
# - EPERM, EACCES, ENOSYS: undocumented; may happen if
2117+
# blocked by security policy like SECCOMP
2118+
return False
2119+
2120+
try:
2121+
poller = select.poll()
2122+
poller.register(pidfd, select.POLLIN)
2123+
events = poller.poll(timeout * 1000)
2124+
if not events:
2125+
raise TimeoutExpired(self.args, timeout)
2126+
return True
2127+
finally:
2128+
os.close(pidfd)
2129+
2130+
def _wait_kqueue(self, timeout):
2131+
"""Wait for PID to terminate using kqueue(). macOS and BSD only."""
2132+
if not _CAN_USE_KQUEUE:
2133+
return False
2134+
try:
2135+
kq = select.kqueue()
2136+
except OSError:
2137+
# likely EMFILE / ENFILE (too many open files)
2138+
return False
2139+
2140+
try:
2141+
kev = select.kevent(
2142+
self.pid,
2143+
filter=select.KQ_FILTER_PROC,
2144+
flags=select.KQ_EV_ADD | select.KQ_EV_ONESHOT,
2145+
fflags=select.KQ_NOTE_EXIT,
2146+
)
2147+
try:
2148+
events = kq.control([kev], 1, timeout) # wait
2149+
except OSError:
2150+
return False
2151+
else:
2152+
if not events:
2153+
raise TimeoutExpired(self.args, timeout)
2154+
return True
2155+
finally:
2156+
kq.close()
20492157

20502158
def _wait(self, timeout):
2051-
"""Internal implementation of wait() on POSIX."""
2159+
"""Internal implementation of wait() on POSIX.
2160+
2161+
Uses efficient pidfd_open() + poll() on Linux or kqueue()
2162+
on macOS/BSD when available. Falls back to polling
2163+
waitpid(WNOHANG) otherwise.
2164+
"""
20522165
if self.returncode is not None:
20532166
return self.returncode
20542167

20552168
if timeout is not None:
2056-
endtime = _time() + timeout
2169+
if timeout < 0:
2170+
raise TimeoutExpired(self.args, timeout)
2171+
started = _time()
2172+
endtime = started + timeout
2173+
2174+
# Try efficient wait first.
2175+
if self._wait_pidfd(timeout) or self._wait_kqueue(timeout):
2176+
# Process is gone. At this point os.waitpid(pid, 0)
2177+
# will return immediately, but in very rare races
2178+
# the PID may have been reused.
2179+
# os.waitpid(pid, WNOHANG) ensures we attempt a
2180+
# non-blocking reap without blocking indefinitely.
2181+
with self._waitpid_lock:
2182+
if self.returncode is not None:
2183+
return self.returncode # Another thread waited.
2184+
(pid, sts) = self._try_wait(os.WNOHANG)
2185+
assert pid == self.pid or pid == 0
2186+
if pid == self.pid:
2187+
self._handle_exitstatus(sts)
2188+
return self.returncode
2189+
# os.waitpid(pid, WNOHANG) returned 0 instead
2190+
# of our PID, meaning PID has not yet exited,
2191+
# even though poll() / kqueue() said so. Very
2192+
# rare and mostly theoretical. Fallback to busy
2193+
# polling.
2194+
elapsed = _time() - started
2195+
endtime -= elapsed
2196+
20572197
# Enter a busy loop if we have a timeout. This busy loop was
20582198
# cribbed from Lib/threading.py in Thread.wait() at r71065.
20592199
delay = 0.0005 # 500 us -> initial delay of 1 ms
@@ -2085,6 +2225,7 @@ def _wait(self, timeout):
20852225
# http://bugs.python.org/issue14396.
20862226
if pid == self.pid:
20872227
self._handle_exitstatus(sts)
2228+
20882229
return self.returncode
20892230

20902231

Lib/test/test_subprocess.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,8 @@ def test_wait(self):
14231423
def test_wait_timeout(self):
14241424
p = subprocess.Popen([sys.executable,
14251425
"-c", "import time; time.sleep(0.3)"])
1426+
with self.assertRaises(subprocess.TimeoutExpired) as c:
1427+
p.wait(timeout=0)
14261428
with self.assertRaises(subprocess.TimeoutExpired) as c:
14271429
p.wait(timeout=0.0001)
14281430
self.assertIn("0.0001", str(c.exception)) # For coverage of __str__.
@@ -4094,5 +4096,122 @@ def test_broken_pipe_cleanup(self):
40944096
self.assertTrue(proc.stdin.closed)
40954097

40964098

4099+
4100+
class FastWaitTestCase(BaseTestCase):
4101+
"""Tests for efficient (pidfd_open() + poll() / kqueue()) process
4102+
waiting in subprocess.Popen.wait().
4103+
"""
4104+
CAN_USE_PIDFD_OPEN = subprocess._CAN_USE_PIDFD_OPEN
4105+
CAN_USE_KQUEUE = subprocess._CAN_USE_KQUEUE
4106+
COMMAND = [sys.executable, "-c", "import time; time.sleep(0.3)"]
4107+
WAIT_TIMEOUT = 0.0001 # 0.1 ms
4108+
4109+
def assert_fast_waitpid_error(self, patch_point):
4110+
# Emulate a case where pidfd_open() or kqueue() fails.
4111+
# Busy-poll wait should be used as fallback.
4112+
exc = OSError(errno.EMFILE, os.strerror(errno.EMFILE))
4113+
with mock.patch(patch_point, side_effect=exc) as m:
4114+
p = subprocess.Popen(self.COMMAND)
4115+
with self.assertRaises(subprocess.TimeoutExpired):
4116+
p.wait(self.WAIT_TIMEOUT)
4117+
self.assertEqual(p.wait(timeout=support.SHORT_TIMEOUT), 0)
4118+
self.assertTrue(m.called)
4119+
4120+
@unittest.skipIf(not CAN_USE_PIDFD_OPEN, reason="needs pidfd_open()")
4121+
def test_wait_pidfd_open_error(self):
4122+
self.assert_fast_waitpid_error("os.pidfd_open")
4123+
4124+
@unittest.skipIf(not CAN_USE_KQUEUE, reason="needs kqueue() for proc")
4125+
def test_wait_kqueue_error(self):
4126+
self.assert_fast_waitpid_error("select.kqueue")
4127+
4128+
@unittest.skipIf(not CAN_USE_KQUEUE, reason="needs kqueue() for proc")
4129+
def test_kqueue_control_error(self):
4130+
# Emulate a case where kqueue.control() fails. Busy-poll wait
4131+
# should be used as fallback.
4132+
p = subprocess.Popen(self.COMMAND)
4133+
kq_mock = mock.Mock()
4134+
kq_mock.control.side_effect = OSError(
4135+
errno.EPERM, os.strerror(errno.EPERM)
4136+
)
4137+
kq_mock.close = mock.Mock()
4138+
4139+
with mock.patch("select.kqueue", return_value=kq_mock) as m:
4140+
with self.assertRaises(subprocess.TimeoutExpired):
4141+
p.wait(self.WAIT_TIMEOUT)
4142+
self.assertEqual(p.wait(timeout=support.SHORT_TIMEOUT), 0)
4143+
self.assertTrue(m.called)
4144+
4145+
def assert_wait_race_condition(self, patch_target, real_func):
4146+
# Call pidfd_open() / kqueue(), then terminate the process.
4147+
# Make sure that the wait call (poll() / kqueue.control())
4148+
# still works for a terminated PID.
4149+
p = subprocess.Popen(self.COMMAND)
4150+
4151+
def wrapper(*args, **kwargs):
4152+
ret = real_func(*args, **kwargs)
4153+
try:
4154+
os.kill(p.pid, signal.SIGTERM)
4155+
os.waitpid(p.pid, 0)
4156+
except OSError:
4157+
pass
4158+
return ret
4159+
4160+
with mock.patch(patch_target, side_effect=wrapper) as m:
4161+
status = p.wait(timeout=support.SHORT_TIMEOUT)
4162+
self.assertTrue(m.called)
4163+
self.assertEqual(status, 0)
4164+
4165+
@unittest.skipIf(not CAN_USE_PIDFD_OPEN, reason="needs pidfd_open()")
4166+
def test_pidfd_open_race(self):
4167+
self.assert_wait_race_condition("os.pidfd_open", os.pidfd_open)
4168+
4169+
@unittest.skipIf(not CAN_USE_KQUEUE, reason="needs kqueue() for proc")
4170+
def test_kqueue_race(self):
4171+
self.assert_wait_race_condition("select.kqueue", select.kqueue)
4172+
4173+
def assert_notification_without_immediate_reap(self, patch_target):
4174+
# Verify fallback to busy polling when poll() / kqueue()
4175+
# succeeds, but waitpid(pid, WNOHANG) returns (0, 0).
4176+
def waitpid_wrapper(pid, flags):
4177+
nonlocal ncalls
4178+
ncalls += 1
4179+
if ncalls == 1:
4180+
return (0, 0)
4181+
return real_waitpid(pid, flags)
4182+
4183+
ncalls = 0
4184+
real_waitpid = os.waitpid
4185+
with mock.patch.object(subprocess.Popen, patch_target, return_value=True) as m1:
4186+
with mock.patch("os.waitpid", side_effect=waitpid_wrapper) as m2:
4187+
p = subprocess.Popen(self.COMMAND)
4188+
with self.assertRaises(subprocess.TimeoutExpired):
4189+
p.wait(self.WAIT_TIMEOUT)
4190+
self.assertEqual(p.wait(timeout=support.SHORT_TIMEOUT), 0)
4191+
self.assertTrue(m1.called)
4192+
self.assertTrue(m2.called)
4193+
4194+
@unittest.skipIf(not CAN_USE_PIDFD_OPEN, reason="needs pidfd_open()")
4195+
def test_pidfd_open_notification_without_immediate_reap(self):
4196+
self.assert_notification_without_immediate_reap("_wait_pidfd")
4197+
4198+
@unittest.skipIf(not CAN_USE_KQUEUE, reason="needs kqueue() for proc")
4199+
def test_kqueue_notification_without_immediate_reap(self):
4200+
self.assert_notification_without_immediate_reap("_wait_kqueue")
4201+
4202+
@unittest.skipUnless(
4203+
CAN_USE_PIDFD_OPEN or CAN_USE_KQUEUE,
4204+
"fast wait mechanism not available"
4205+
)
4206+
def test_fast_path_avoid_busy_loop(self):
4207+
# assert that the busy loop is not called as long as the fast
4208+
# wait is available
4209+
with mock.patch('time.sleep') as m:
4210+
p = subprocess.Popen(self.COMMAND)
4211+
with self.assertRaises(subprocess.TimeoutExpired):
4212+
p.wait(self.WAIT_TIMEOUT)
4213+
self.assertEqual(p.wait(timeout=support.LONG_TIMEOUT), 0)
4214+
self.assertFalse(m.called)
4215+
40974216
if __name__ == "__main__":
40984217
unittest.main()
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
:meth:`subprocess.Popen.wait`: when ``timeout`` is not ``None``, an efficient
2+
event-driven mechanism now waits for process termination, if available. Linux
3+
>= 5.3 uses :func:`os.pidfd_open` + :func:`select.poll`. macOS and other BSD
4+
variants use :func:`select.kqueue` + ``KQ_FILTER_PROC`` + ``KQ_NOTE_EXIT``.
5+
Windows keeps using ``WaitForSingleObject`` (unchanged). If none of these
6+
mechanisms are available, the function falls back to the traditional busy loop
7+
(non-blocking call and short sleeps). Patch by Giampaolo Rodola.

0 commit comments

Comments
 (0)