Let Python stubs respect RUNFILES_* while finding the module space

When invoking a py_binary() through an sh_binary() using the Bash runfiles library, the location of the py_binary() will be resolved from the runfiles manifest file. This means that the argv[0] of the Python stub script will not point to a location under the runfiles directory of the shell script, but to a location in Bazel's execroot. Normally this does not lead to any issues, as argv[0] + ".runfiles" happens to point to be a valid runfiles directory as well. It does become problematic when --nobuild_runfile_links is provided, as in that case only the outer shell script is guaranteed to have a runfiles directory; not the inner Python script. This change extends the Python stub template to also consider RUNFILES_DIR when no runfiles directory can be found. Even though it's not technically correct, we also attempt to derive the runfiles directory from RUNFILES_MANIFEST_FILE. I suspect that this is a necessity as long as py_binary()s cannot operate purely using a manifest file. They currently depend on a concrete instantiation of the runfiles directory. Fixes: #11997
bazelbuild · Sep 26, 2022 · 86521ab · 86521ab
1 parent 66f4001
commit 86521ab
Show file tree

Hide file tree

Showing 3 changed files with 80 additions and 12 deletions.
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/rules/python/python_stub_template.txt b/src/main/java/com/google/devtools/build/lib/bazel/rules/python/python_stub_template.txt
@@ -129,8 +129,24 @@ def CreatePythonPathEntries(python_imports, module_space):
   parts = python_imports.split(':')
   return [module_space] + ['%s/%s' % (module_space, path) for path in parts]
 
-def FindModuleSpace():
+def FindModuleSpace(main_rel_path):
   """Finds the runfiles tree."""
+  # When the calling process used the runfiles manifest to resolve the
+  # location of this stub script, the path may be expanded. This means
+  # argv[0] may no longer point to a location inside the runfiles
+  # directory. We should therefore respect RUNFILES_DIR and
+  # RUNFILES_MANIFEST_FILE set by the caller.
+  runfiles_dir = os.environ.get('RUNFILES_DIR', None)
+  if not runfiles_dir:
+    runfiles_manifest_file = os.environ.get('RUNFILES_MANIFEST_FILE', '')
+    if (runfiles_manifest_file.endswith('.runfiles_manifest') or
+        runfiles_manifest_file.endswith('.runfiles/MANIFEST')):
+      runfiles_dir = runfiles_manifest_file[:-9]
+  # Be defensive: the runfiles dir should contain our main entry point. If
+  # it doesn't, then it must not be our runfiles directory.
+  if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)):
+    return runfiles_dir
+
   stub_filename = sys.argv[0]
   if not os.path.isabs(stub_filename):
     stub_filename = os.path.join(os.getcwd(), stub_filename)
@@ -413,10 +429,17 @@ def Main():
 
   new_env = {}
 
+  # The main Python source file.
+  # The magic string percent-main-percent is replaced with the runfiles-relative
+  # filename of the main file of the Python binary in BazelPythonSemantics.java.
+  main_rel_path = '%main%'
+  if IsWindows():
+    main_rel_path = main_rel_path.replace('/', os.sep)
+
   if IsRunningFromZip():
     module_space = CreateModuleSpace()
   else:
-    module_space = FindModuleSpace()
+    module_space = FindModuleSpace(main_rel_path)
 
   python_imports = '%imports%'
   python_path_entries = CreatePythonPathEntries(python_imports, module_space)
@@ -446,14 +469,7 @@ def Main():
   # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
   new_env['PYTHONSAFEPATH'] = '1'
 
-  # Now look for my main python source file.
-  # The magic string percent-main-percent is replaced with the filename of the
-  # main file of the Python binary in BazelPythonSemantics.java.
-  rel_path = '%main%'
-  if IsWindows():
-    rel_path = rel_path.replace('/', os.sep)
-
-  main_filename = os.path.join(module_space, rel_path)
+  main_filename = os.path.join(module_space, main_rel_path)
   main_filename = GetWindowsPathWithUNCPrefix(main_filename)
   assert os.path.exists(main_filename), \
          'Cannot exec() %r: file not found.' % main_filename

diff --git a/src/test/shell/bazel/bazel_example_test.sh b/src/test/shell/bazel/bazel_example_test.sh
@@ -151,15 +151,20 @@ function test_shell() {
 function test_python() {
   assert_build "//examples/py:bin"
 
-  ./bazel-bin/examples/py/bin >& $TEST_log \
+  # Don't invoke the Python binary with RUNFILES_* set, as that causes
+  # it to look in the runfiles directory of this test, instead of the
+  # one belonging to the Python binary.
+  env -u RUNFILES_DIR -u RUNFILES_MANIFEST_FILE \
+    ./bazel-bin/examples/py/bin >& $TEST_log \
     || fail "//examples/py:bin execution failed"
   expect_log "Fib(5)=8"
 
   # Mutate //examples/py:bin so that it needs to build again.
   echo "print('Hello')" > ./examples/py/bin.py
   # Ensure that we can rebuild //examples/py::bin without error.
   assert_build "//examples/py:bin"
-  ./bazel-bin/examples/py/bin >& $TEST_log \
+  env -u RUNFILES_DIR -u RUNFILES_MANIFEST_FILE \
+    ./bazel-bin/examples/py/bin >& $TEST_log \
     || fail "//examples/py:bin 2nd build execution failed"
   expect_log "Hello"
 }

diff --git a/src/test/shell/integration/python_stub_test.sh b/src/test/shell/integration/python_stub_test.sh
@@ -190,4 +190,51 @@ EOF
       &> $TEST_log || fail "bazel run failed"
 }
 
+# When invoking a Python binary using the runfiles manifest, the stub
+# script's argv[0] will point to a location in the execroot; not the
+# runfiles directory of the caller. The stub script should still be
+# capable of finding its runfiles directory by considering RUNFILES_DIR
+# and RUNFILES_MANIFEST_FILE set by the caller.
+function test_python_through_bash_without_runfile_links() {
+  mkdir -p python_through_bash
+
+  cat > python_through_bash/BUILD << EOF
+py_binary(
+    name = "inner",
+    srcs = ["inner.py"],
+)
+
+sh_binary(
+    name = "outer",
+    srcs = ["outer.sh"],
+    data = [":inner"],
+    deps = ["@bazel_tools//tools/bash/runfiles"],
+)
+EOF
+
+  cat > python_through_bash/outer.sh << EOF
+#!/bin/bash
+
+# --- begin runfiles.bash initialization v2 ---
+# Copy-pasted from the Bazel Bash runfiles library v2.
+set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
+source "\${RUNFILES_DIR:-/dev/null}/\$f" 2>/dev/null || \
+  source "\$(grep -sm1 "^\$f " "\${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
+  source "\$0.runfiles/\$f" 2>/dev/null || \
+  source "\$(grep -sm1 "^\$f " "\$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  source "\$(grep -sm1 "^\$f " "\$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
+  { echo>&2 "ERROR: cannot find \$f"; exit 1; }; f=; set -e
+# --- end runfiles.bash initialization v2 ---
+
+exec "\$(rlocation main/python_through_bash/inner${EXE_EXT})"
+EOF
+  chmod +x python_through_bash/outer.sh
+
+  touch python_through_bash/inner.py
+
+  bazel run --nobuild_runfile_links //python_through_bash:outer \
+      &> $TEST_log || fail "bazel run failed"
+  expect_log "I am Python"
+}
+
 run_suite "Tests for the Python rules without Python execution"