Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 71 additions & 3 deletions _helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import hashlib
import os
from os.path import realpath, join as opj, sep as pathsep
import sys
from os.path import isfile, realpath, join as opj, sep as pathsep
from string import Template
from configparser import ConfigParser


Expand All @@ -24,7 +25,9 @@ def attempt_load_config():

configs = os.listdir(config_dir)
# filter out hidden files and the template config
configs = [f for f in configs if not (f.startswith('template') or f.startswith('.'))]
configs = [f for f in configs if not (f.startswith('template')
or f.startswith('.'))
]
if len(configs) == 1:
config_path = opj(config_dir, configs[0])
config = parse_config(config_path)
Expand All @@ -39,6 +42,17 @@ def attempt_load_config():
location").with_traceback(e.__traceback__)


def fmt_remote_commands(commands):
"""
Formats a list of shell commands to be run in the SshShell instance.
Necessary because underlying Python SSH client (Paramiko) won't run any
state changes between commands. So we run them all at once
"""
executable = ['bash', '-c']
commands_str = ' && '.join(commands)
return executable + commands_str


def md5_checksum(filepath):
"""
computes the MD5 checksum of a local file to compare against remote
Expand All @@ -60,6 +74,10 @@ def parse_config(config_path):
"""
parses various user-specifc options from config file in configs dir
"""
config_path = realpath(config_path)
if not isfile(config_path):
raise FileNotFoundError(f'Invalid path to config file: {config_path}')

raw_config = ConfigParser(inline_comment_prefixes='#')
with open(config_path, 'r') as f:
raw_config.read_file(f)
Expand All @@ -75,7 +93,6 @@ def prompt_input(question, default=None):
"""
given a question, prompts user for command line input
returns True for 'yes'/'y' and False for 'no'/'n' responses

"""
assert default in ('yes', 'no', None), \
"Default response must be either 'yes', 'no', or None"
Expand Down Expand Up @@ -105,3 +122,54 @@ def prompt_input(question, default=None):
else:
sys.stdout.write("Please respond with either 'yes' (or 'y') \
or 'no' (or 'n')\n")


def write_remote_submitter(remote_shell, job_config, env_activate_cmd, env_deactivate_cmd, submitter_walltime='12:00:00'):
remote_dir = job_config['workingdir']
# TODO: ability to handle custom-named submission script
submitter_fpath = opj(remote_dir, 'submit_jobs.sh')

try:
assert remote_shell.is_dir(remote_dir)
except AssertionError as e:
raise ValueError(
f"Can't create job submission script in dir: {remote_dir}. \
Intended directory is an existing file."
).with_traceback(e.__traceback__)
except FileNotFoundError as e:
raise FileNotFoundError(
f"Can't create job submission script in dir: {remote_dir}. \
Intended directory does not exist."
).with_traceback(e.__traceback__)

template_vals = {
'jobname': job_config['jobname'],
'walltime': submitter_walltime,
'modules': job_config['modules'],
'activate_cmd': env_activate_cmd,
'deactivate_cmd': env_deactivate_cmd,
'env_name': job_config['env_name'],
'cmd_wrapper': job_config['cmd_wrapper'],
'submitter_script': submitter_fpath
}

template = Template(
"""#!/bin/bash -l

#PBS -N ${jobname}-submitter
#PBS -q default
#PBS -l nodes=1:ppn=1
#PBS -l walltime=${walltime}
#PBS -m bea

module load $modules
$activate_cmd $env_name

$cmd_wrapper $submitter_script

$deactivate_cmd"""
)

content = template.substitute(template_vals)
remote_shell.write_text(submitter_fpath, content)
return submitter_fpath
63 changes: 48 additions & 15 deletions cluster_scripts/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,55 @@
job_config = dict()

# ====== MODIFY ONLY THE CODE BETWEEN THESE LINES ======
# job creation options
job_config['startdir'] = # path to the foler for this project. Should be something like /dartfs/rc/lab/D/DBIC/CDL/<your_net_id>/<project_name>
# directory location
job_config['startdir'] = # (str) path to the remote foler for this project.
# Should be something like /dartfs/rc/lab/D/DBIC/CDL/<your_net_id>/<project_name>

# job environment options
job_config['modules'] = # (str) modules you need to load for your scripts
# separated by a space (e.g., "python matlab")
job_config['env_type'] = # (str) what kind of Python environment you use
# (NOTE: sole option is currently "conda" -- "venv"
# and "virtualenv" coming soon!)
job_config['env_name'] = # (str) names of (currently, conda) environment
# you want your submission script and jobs to run in
job_config['cmd_wrapper'] = # (str) replace with actual command wrapper
# (e.g. "python", "matlab", etc.)

# runtime options
job_config['jobname'] = # (str) default job name
job_config['queue'] = # (str) options: default, test, largeq
# (when in doubt, use "largeq")
job_config['nnodes'] = # (int) how many nodes to use for this one job
job_config['ppn'] = # (int) how many processors to use for this one
# job (assume 4GB of RAM per processor)
job_config['walltime'] = # (str) maximum runtime, in h:MM:SS
# (e.g., "10:00:00")

# Email update options
job_config['email_updates'] = # (str) what events you want to receive email
# notifications about (see below)
job_config['email_addr'] = # (str) email address where you want job
# notifications sent (see below)

# Torque (the cluster's resource manager) can send you emails about the status
# of your job. These emails will come from "root" (torque@northstar.dartmouth.edu).
# To set your notification preferences for the current batch of to-be-submitted
# jobs, set the 'email_updates' value to a *single* string that consists of some
# combination of the following options:
# + "a" - notify me when a job is aborted (Torque default behavior)
# + "b" - notify me when a job begins
# + "e" - notify me when a job finishes

# If you would like to receive emails for any of these events, set the value of
# 'email_addr' to the address you'd like to receive them
# To receive no emails from Torque, set 'email_updates' to "n" (no mail) and
# 'email_addr' to an empty string ('')

# ====== MODIFY ONLY THE CODE BETWEEN THESE LINES ======

job_config['datadir'] = opj(job_config['startir'], 'data')
job_config['workingdir'] = opj(job_config['startir'], 'scripts')
job_config['template'] = opj(dirname(realpath(__file__)), 'run_job_cluster.sh')
job_config['scriptdir'] = opj(job_config['workingdir'], 'scripts')
job_config['lockdir'] = opj(job_config['workingdir'], 'locks')

# runtime options
job_config['jobname'] = # (str) default job name
job_config['q'] = # (str) options: default, test, largeq (when in doubt, use "largeq")
job_config['nnodes'] = # (int) how many nodes to use for this one job
job_config['ppn'] = # (int) how many processors to use for this one job (assume 4GB of RAM per processor)
job_config['walltime'] = # (str) maximum runtime, in h:MM:SS (e.g., "10:00:00")
job_config['cmd_wrapper'] = # (str) replace with actual command wrapper (e.g. "python", "matlab", etc.)
job_config['modules'] = # (str) modules you need to load for your scripts separated by a space (e.g., "python matlab")
job_config['env_type'] = # (str) what kind of Python environment you use (NOTE: sole option is currently conda -- venv and virtualenv coming soon!)
job_config['env_name'] = # (str) names of (currently, conda) environment you want your submission script and jobs to run in
# ====== MODIFY ONLY THE CODE BETWEEN THESE LINES ======
job_config['lockdir'] = opj(job_config['workingdir'], 'locks')
10 changes: 4 additions & 6 deletions cluster_scripts/run_job_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,20 @@
#PBS -N <config['jobname']>

# specify the queue the job will be added to (if more than 600, use largeq)
#PBS -q <config['q']>
#PBS -q <config['queue']>

# specify the number of cores and nodes (estimate 4GB of RAM per core)
#PBS -l nodes=<config['nnodes']>:ppn=<config['ppn']>

# specify how long the job should run (wall time)
#PBS -l walltime=<config['walltime']>

# set the working directory *of the job* to the specified start directory
cd <config['startdir']>

echo ----

echo ACTIVATING MEMORY DYNAMICS VIRTUAL ENVIRONMENT
module load python
source activate memdyn
echo ACTIVATING VIRTUAL ENVIRONMENT
module load <config['modules']>
<config['env_cmd']> <config['env_name']>

echo ----

Expand Down
Loading