Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/executorlib/standalone/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,15 @@ def generate_slurm_command(
if slurm_cmd_args is not None and len(slurm_cmd_args) > 0:
command_prepend_lst += slurm_cmd_args
return command_prepend_lst


def set_current_directory_in_environment():
"""
Add the current directory to the PYTHONPATH to be able to access local Python modules.
"""
environment = os.environ
current_path = os.getcwd()
if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"]:
environment["PYTHONPATH"] = os.getcwd() + ":" + environment["PYTHONPATH"]
elif "PYTHONPATH" not in environment:
environment["PYTHONPATH"] = os.getcwd()
Comment on lines +167 to +176
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Bug: substring match on PYTHONPATH can cause false positives/negatives.

current_path not in environment["PYTHONPATH"] performs a substring check on the entire string, not a path-component check. For example, if PYTHONPATH="/home/user/projectX" and cwd="/home/user/project", the substring match succeeds and the directory is incorrectly skipped. Split by the separator to check membership correctly.

Also, Line 174 calls os.getcwd() again instead of reusing current_path.

Proposed fix
 def set_current_directory_in_environment():
     """
     Add the current directory to the PYTHONPATH to be able to access local Python modules.
     """
     environment = os.environ
     current_path = os.getcwd()
-    if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"]:
-        environment["PYTHONPATH"] = os.getcwd() + ":" + environment["PYTHONPATH"]
+    if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"].split(os.pathsep):
+        environment["PYTHONPATH"] = current_path + os.pathsep + environment["PYTHONPATH"]
     elif "PYTHONPATH" not in environment:
-        environment["PYTHONPATH"] = os.getcwd()
+        environment["PYTHONPATH"] = current_path
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def set_current_directory_in_environment():
"""
Add the current directory to the PYTHONPATH to be able to access local Python modules.
"""
environment = os.environ
current_path = os.getcwd()
if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"]:
environment["PYTHONPATH"] = os.getcwd() + ":" + environment["PYTHONPATH"]
elif "PYTHONPATH" not in environment:
environment["PYTHONPATH"] = os.getcwd()
def set_current_directory_in_environment():
"""
Add the current directory to the PYTHONPATH to be able to access local Python modules.
"""
environment = os.environ
current_path = os.getcwd()
if "PYTHONPATH" in environment and current_path not in environment["PYTHONPATH"].split(os.pathsep):
environment["PYTHONPATH"] = current_path + os.pathsep + environment["PYTHONPATH"]
elif "PYTHONPATH" not in environment:
environment["PYTHONPATH"] = current_path
🤖 Prompt for AI Agents
In `@src/executorlib/standalone/command.py` around lines 167 - 176, In
set_current_directory_in_environment(), avoid substring checks and redundant
os.getcwd() calls: get current_path once, split the existing PYTHONPATH using
os.pathsep into components and check membership against current_path (not via
"in" on the full string), and if missing, join by os.pathsep (preferably
prepending current_path + os.pathsep + existing if existing non-empty); also
reuse current_path instead of calling os.getcwd() again and assign the new
string back to environment["PYTHONPATH"].

2 changes: 2 additions & 0 deletions src/executorlib/task_scheduler/file/spawner_pysqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pysqa import QueueAdapter

from executorlib.standalone.command import set_current_directory_in_environment
from executorlib.standalone.hdf import dump, get_queue_id
from executorlib.standalone.inputcheck import check_file_exists
from executorlib.standalone.scheduler import pysqa_execute_command, terminate_with_pysqa
Expand Down Expand Up @@ -85,6 +86,7 @@ def execute_with_pysqa(
os.path.dirname(os.path.abspath(cwd))
)
submit_kwargs.update(resource_dict)
set_current_directory_in_environment()
queue_id = qa.submit_job(**submit_kwargs)
Comment on lines +89 to 90
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, find the set_current_directory_in_environment function
fd -t f -e py | xargs grep -l "set_current_directory_in_environment" | head -5

Repository: pyiron/executorlib

Length of output: 210


🏁 Script executed:

# Also check the spawner_pysqa.py file around lines 89-90
cat -n src/executorlib/task_scheduler/file/spawner_pysqa.py | sed -n '85,95p'

Repository: pyiron/executorlib

Length of output: 525


🏁 Script executed:

# Check how set_current_directory_in_environment is implemented
rg -A10 "def set_current_directory_in_environment" --type py

Repository: pyiron/executorlib

Length of output: 910


🏁 Script executed:

# Check PySQA's QueueAdapter.submit_job to see if it uses os.environ
python3 <<'EOF'
try:
    import inspect
    import pysqa
    sig = inspect.signature(pysqa.QueueAdapter.submit_job)
    print("submit_job signature:", sig)
    # Try to get source if available
    try:
        src = inspect.getsource(pysqa.QueueAdapter.submit_job)
        print("\nSource (first 50 lines):")
        print('\n'.join(src.split('\n')[:50]))
    except:
        print("Could not retrieve source")
except ImportError:
    print("PySQA not installed, checking online docs...")
EOF

Repository: pyiron/executorlib

Length of output: 106


🏁 Script executed:

# Check the broader context in spawner_pysqa.py to understand qa object
cat -n src/executorlib/task_scheduler/file/spawner_pysqa.py | sed -n '1,100p'

Repository: pyiron/executorlib

Length of output: 4675


🏁 Script executed:

# Find pysqa_execute_command implementation
rg -A20 "def pysqa_execute_command" --type py

Repository: pyiron/executorlib

Length of output: 1941


🏁 Script executed:

# Compare with subprocess spawner to see how it handles environment
cat -n src/executorlib/task_scheduler/file/spawner_subprocess.py | sed -n '1,100p'

Repository: pyiron/executorlib

Length of output: 3731


🏁 Script executed:

# Check if there's any environment handling in spawner_subprocess
rg -n "set_current_directory_in_environment\|environ" src/executorlib/task_scheduler/file/spawner_subprocess.py

Repository: pyiron/executorlib

Length of output: 44


🏁 Script executed:

# Check PySQA documentation or examples in repo for environment handling
rg -n "submit_job\|environment\|environ" src/executorlib --type py -C2 | grep -A2 -B2 "submit_job"

Repository: pyiron/executorlib

Length of output: 44


🏁 Script executed:

# Get the full pysqa_execute_command function
rg -A40 "def pysqa_execute_command" src/executorlib/standalone/scheduler.py

Repository: pyiron/executorlib

Length of output: 1488


🏁 Script executed:

# Check if PySQA's QueueAdapter has any environment-related parameters
rg -n "environment\|env\|environ" src/executorlib --type py | grep -i pysqa

Repository: pyiron/executorlib

Length of output: 44


🏁 Script executed:

# Search for any usage of environment variable passing with PySQA in the codebase
rg -B5 -A10 "submit_job" src/executorlib/task_scheduler/file/spawner_pysqa.py

Repository: pyiron/executorlib

Length of output: 564


🏁 Script executed:

# Check if there are any tests or examples showing environment handling
fd -e py -path "*test*" | xargs grep -l "execute_with_pysqa\|submit_job" 2>/dev/null | head -3

Repository: pyiron/executorlib

Length of output: 230


🏁 Script executed:

# Check the actual command being submitted - does it include environment setup?
cat -n src/executorlib/task_scheduler/file/spawner_pysqa.py | sed -n '66,70p'

Repository: pyiron/executorlib

Length of output: 302


🏁 Script executed:

# Look for how environment variables might be passed in the command string
rg -B5 -A5 "\"command\":" src/executorlib/task_scheduler/file/spawner_pysqa.py

Repository: pyiron/executorlib

Length of output: 500


🏁 Script executed:

# Check if pysqa documentation is in the repo or referenced
find . -name "*.md" -o -name "*.rst" | xargs grep -l "pysqa\|PySQA" 2>/dev/null | head -3

Repository: pyiron/executorlib

Length of output: 123


🏁 Script executed:

# Get more context on how the command is prepared - especially check if it includes env setup
cat -n src/executorlib/standalone/scheduler.py | head -50

Repository: pyiron/executorlib

Length of output: 2239


🏁 Script executed:

# Check if there's any documentation about how subprocess inherits environment in pysqa_execute_command
cat -n src/executorlib/standalone/scheduler.py | sed -n '1,80p'

Repository: pyiron/executorlib

Length of output: 2841


🏁 Script executed:

# Check if there's any documentation about PySQA environment parameter
cat README.md | grep -A5 -B5 -i "environment\|pysqa" | head -30

Repository: pyiron/executorlib

Length of output: 2344


🏁 Script executed:

# Look for any environment setup in the command being submitted
rg -n "export\|PYTHONPATH" src/executorlib --type py | head -20

Repository: pyiron/executorlib

Length of output: 44


🏁 Script executed:

# Check if the command itself includes any environment setup
cat -n src/executorlib/standalone/command.py

Repository: pyiron/executorlib

Length of output: 8077


🏁 Script executed:

# Check how the command is constructed and if there's any environment export mechanism
cat -n src/executorlib/task_scheduler/file/spawner_pysqa.py | sed -n '40,70p'

Repository: pyiron/executorlib

Length of output: 1509


🏁 Script executed:

# Look for how pysqa_execute_command is used - specifically if it runs sbatch or similar
rg -B3 -A3 "sbatch\|submit" src/executorlib/standalone/scheduler.py

Repository: pyiron/executorlib

Length of output: 44


🏁 Script executed:

# Check if QueueAdapter or pysqa_execute_command has any env parameter documentation
python3 << 'EOF'
import subprocess
import inspect

# Test if subprocess.check_output inherits parent env when env is not specified
help_text = """
When env is not specified in subprocess calls, the child process inherits
the parent process's os.environ. However, for HPC schedulers like SLURM,
the job submitted via sbatch runs in a separate compute environment and
does NOT automatically inherit the submitting shell's environment variables.
"""
print(help_text)

# Simulate what happens in pysqa_execute_command
print("\nIn pysqa_execute_command:")
print("- subprocess.check_output() is called WITHOUT an 'env' parameter")
print("- This means it inherits parent process's os.environ")
print("- So the modified PYTHONPATH is passed to the sbatch command")
print("\nBUT:")
print("- sbatch submits the job to SLURM")
print("- The job runs in SLURM's compute environment")
print("- SLURM does NOT inherit the submitting shell's environment by default")
print("- You need --export=ALL or explicit env setup in the job script")
EOF

Repository: pyiron/executorlib

Length of output: 786


The modified os.environ will not automatically reach SLURM-queued jobs.

set_current_directory_in_environment() modifies the parent process's os.environ, which works for local subprocess.Popen (which inherits the parent's environment by default). However, PySQA submits jobs to HPC schedulers like SLURM via sbatch, which runs the job in a separate compute environment. SLURM does not automatically inherit the submitting shell's environment variables—the queued job will start with SLURM's default environment, not the modified PYTHONPATH.

To fix this, either explicitly export environment variables in the job submission (e.g., --export=ALL in the sbatch call), embed environment setup commands in the submitted command string, or pass an explicit env parameter through PySQA's submit_job() if its API supports it.

🤖 Prompt for AI Agents
In `@src/executorlib/task_scheduler/file/spawner_pysqa.py` around lines 89 - 90,
The change to os.environ via set_current_directory_in_environment() won't
propagate to SLURM jobs submitted with qa.submit_job(**submit_kwargs); update
the submission so the job receives the PYTHONPATH (and any other env) by either
adding sbatch export flags (e.g. include "--export=ALL" or explicit VAR=VALUE in
the sbatch args inside submit_kwargs), or inject environment setup into the job
command string (prepend export PYTHONPATH=...; cd ...; ...), or pass an explicit
env mapping via submit_kwargs if PySQA's submit_job supports an env parameter;
modify submit_kwargs construction or the call-site around qa.submit_job to add
one of these options and remove reliance on the parent process's os.environ from
set_current_directory_in_environment().

dump(file_name=file_name, data_dict={"queue_id": queue_id})
return queue_id
Expand Down
19 changes: 16 additions & 3 deletions src/executorlib/task_scheduler/file/spawner_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import time
from typing import Optional

from executorlib.standalone.command import set_current_directory_in_environment
from executorlib.standalone.hdf import dump
from executorlib.standalone.inputcheck import check_file_exists

Expand Down Expand Up @@ -53,11 +54,12 @@ def execute_in_subprocess(
)
if backend is not None:
raise ValueError("backend parameter is not supported for subprocess spawner.")
if resource_dict is None:
resource_dict = {}
cwd = resource_dict.get("cwd", cache_directory)
cwd = _get_working_directory(
cache_directory=cache_directory, resource_dict=resource_dict
)
if cwd is not None:
os.makedirs(cwd, exist_ok=True)
set_current_directory_in_environment()
return subprocess.Popen(command, universal_newlines=True, cwd=cwd)


Expand All @@ -71,3 +73,14 @@ def terminate_subprocess(task):
task.terminate()
while task.poll() is None:
time.sleep(0.1)


def _get_working_directory(
cache_directory: Optional[str] = None, resource_dict: Optional[dict] = None
):
if resource_dict is None:
resource_dict = {}
if "cwd" in resource_dict and resource_dict["cwd"] is not None:
return resource_dict["cwd"]
else:
return cache_directory
22 changes: 20 additions & 2 deletions tests/unit/standalone/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
import importlib.util
import sys
import unittest
from executorlib.standalone.command import get_cache_execute_command, get_interactive_execute_command

from executorlib.standalone.command import (
get_cache_execute_command,
get_interactive_execute_command,
set_current_directory_in_environment,
)

skip_mpi4py_test = importlib.util.find_spec("mpi4py") is None

Expand Down Expand Up @@ -92,3 +95,18 @@ def test_get_cache_execute_command_parallel(self):
get_cache_execute_command(cores=2, file_name=file_name, backend="flux", openmpi_oversubscribe=True)
with self.assertRaises(ValueError):
get_cache_execute_command(cores=2, file_name=file_name, backend="flux", exclusive=True)

def test_set_current_directory_in_environment(self):
env = os.environ
if "PYTHONPATH" in env:
python_path = env["PYTHONPATH"]
del env["PYTHONPATH"]
else:
python_path = None
self.assertFalse("PYTHONPATH" in env)
set_current_directory_in_environment()
self.assertTrue("PYTHONPATH" in env)
self.assertEqual(env["PYTHONPATH"], os.getcwd())
env["PYTHONPATH"] = "/my/special/path"
set_current_directory_in_environment()
self.assertEqual(env["PYTHONPATH"], os.getcwd() + ":/my/special/path")