Skip to content

Commit

Permalink
pythongh-117378: Fix multiprocessing forkserver preload sys.path inhe…
Browse files Browse the repository at this point in the history
…ritance. (pythonGH-126538)

pythongh-117378: Fix multiprocessing forkserver preload sys.path inheritance.

`sys.path` was not properly being sent from the parent process when launching
the multiprocessing forkserver process to preload imports.  This bug has been
there since the forkserver start method was introduced in Python 3.4.  It was
always _supposed_ to inherit `sys.path` the same way the spawn method does.

Observable behavior change: A `''` value in `sys.path` will now be replaced in
the forkserver's `sys.path` with an absolute pathname
`os.path.abspath(os.getcwd())` saved at the time that `multiprocessing` was
imported in the parent process as it already was when using the spawn start
method. **This will only be observable during forkserver preload imports**.

The code invoked before calling things in another process already correctly sets `sys.path`.
Which is likely why this went unnoticed for so long as a mere performance issue in
some configurations.

A workaround for the bug on impacted Pythons is to set PYTHONPATH in the
environment before multiprocessing's forkserver process was started. Not perfect
as that is then inherited by other children, etc, but likely good enough for many
people's purposes.

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
  • Loading branch information
2 people authored and picnixz committed Nov 19, 2024
1 parent 3b07365 commit b40459f
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Lib/multiprocessing/forkserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ def ensure_running(self):
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None):
'''Run forkserver.'''
if preload:
if sys_path is not None:
sys.path[:] = sys_path
if '__main__' in preload and main_path is not None:
process.current_process()._inheriting = True
try:
Expand Down
78 changes: 78 additions & 0 deletions Lib/test/_test_multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sys
import os
import gc
import importlib
import errno
import functools
import signal
Expand All @@ -20,8 +21,10 @@
import socket
import random
import logging
import shutil
import subprocess
import struct
import tempfile
import operator
import pickle
import weakref
Expand Down Expand Up @@ -6397,6 +6400,81 @@ def test_atexit(self):
self.assertEqual(f.read(), 'deadbeef')


class _TestSpawnedSysPath(BaseTestCase):
"""Test that sys.path is setup in forkserver and spawn processes."""

ALLOWED_TYPES = ('processes',)

def setUp(self):
self._orig_sys_path = list(sys.path)
self._temp_dir = tempfile.mkdtemp(prefix="test_sys_path-")
self._mod_name = "unique_test_mod"
module_path = os.path.join(self._temp_dir, f"{self._mod_name}.py")
with open(module_path, "w", encoding="utf-8") as mod:
mod.write("# A simple test module\n")
sys.path[:] = [p for p in sys.path if p] # remove any existing ""s
sys.path.insert(0, self._temp_dir)
sys.path.insert(0, "") # Replaced with an abspath in child.
try:
self._ctx_forkserver = multiprocessing.get_context("forkserver")
except ValueError:
self._ctx_forkserver = None
self._ctx_spawn = multiprocessing.get_context("spawn")

def tearDown(self):
sys.path[:] = self._orig_sys_path
shutil.rmtree(self._temp_dir, ignore_errors=True)

@staticmethod
def enq_imported_module_names(queue):
queue.put(tuple(sys.modules))

def test_forkserver_preload_imports_sys_path(self):
ctx = self._ctx_forkserver
if not ctx:
self.skipTest("requires forkserver start method.")
self.assertNotIn(self._mod_name, sys.modules)
multiprocessing.forkserver._forkserver._stop() # Must be fresh.
ctx.set_forkserver_preload(
["test.test_multiprocessing_forkserver", self._mod_name])
q = ctx.Queue()
proc = ctx.Process(target=self.enq_imported_module_names, args=(q,))
proc.start()
proc.join()
child_imported_modules = q.get()
q.close()
self.assertIn(self._mod_name, child_imported_modules)

@staticmethod
def enq_sys_path_and_import(queue, mod_name):
queue.put(sys.path)
try:
importlib.import_module(mod_name)
except ImportError as exc:
queue.put(exc)
else:
queue.put(None)

def test_child_sys_path(self):
for ctx in (self._ctx_spawn, self._ctx_forkserver):
if not ctx:
continue
with self.subTest(f"{ctx.get_start_method()} start method"):
q = ctx.Queue()
proc = ctx.Process(target=self.enq_sys_path_and_import,
args=(q, self._mod_name))
proc.start()
proc.join()
child_sys_path = q.get()
import_error = q.get()
q.close()
self.assertNotIn("", child_sys_path) # replaced by an abspath
self.assertIn(self._temp_dir, child_sys_path) # our addition
# ignore the first element, it is the absolute "" replacement
self.assertEqual(child_sys_path[1:], sys.path[1:])
self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}")


class MiscTestCase(unittest.TestCase):
def test__all__(self):
# Just make sure names in not_exported are excluded
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver
process to correctly inherit the parent's :data:`sys.path` during the importing
of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as
:data:`sys.path` is configured in workers before executing work items.

This bug caused some forkserver module preloading to silently fail to preload.
This manifested as a performance degration in child processes when the
``sys.path`` was required due to additional repeated work in every worker.

It could also have a side effect of ``""`` remaining in :data:`sys.path` during
forkserver preload imports instead of the absolute path from :func:`os.getcwd`
at multiprocessing import time used in the worker ``sys.path``.

Potentially leading to incorrect imports from the wrong location during
preload. We are unaware of that actually happening. The issue was discovered
by someone observing unexpected preload performance gains.

0 comments on commit b40459f

Please sign in to comment.