ContextLab · paxtonfitzpatrick · Jan 21, 2020 · Jan 19, 2020 · Jan 19, 2020 · Jan 19, 2020
diff --git a/_helpers.py b/_helpers.py
@@ -1,7 +1,8 @@
 import hashlib
 import os
-from os.path import realpath, join as opj, sep as pathsep
 import sys
+from os.path import isfile, realpath, join as opj, sep as pathsep
+from string import Template
 from configparser import ConfigParser
 
 
@@ -24,7 +25,9 @@ def attempt_load_config():
 
         configs = os.listdir(config_dir)
         # filter out hidden files and the template config
-        configs = [f for f in configs if not (f.startswith('template') or f.startswith('.'))]
+        configs = [f for f in configs if not (f.startswith('template')
+                                              or f.startswith('.'))
+                   ]
         if len(configs) == 1:
             config_path = opj(config_dir, configs[0])
             config = parse_config(config_path)
@@ -39,6 +42,17 @@ def attempt_load_config():
         location").with_traceback(e.__traceback__)
 
 
+def fmt_remote_commands(commands):
+    """
+    Formats a list of shell commands to be run in the SshShell instance.
+    Necessary because underlying Python SSH client (Paramiko) won't run any
+    state changes between commands.  So we run them all at once
+    """
+    executable = ['bash', '-c']
+    commands_str = ' && '.join(commands)
+    return executable + commands_str
+
+
 def md5_checksum(filepath):
     """
     computes the MD5 checksum of a local file to compare against remote
@@ -60,6 +74,10 @@ def parse_config(config_path):
     """
     parses various user-specifc options from config file in configs dir
     """
+    config_path = realpath(config_path)
+    if not isfile(config_path):
+        raise FileNotFoundError(f'Invalid path to config file: {config_path}')
+
     raw_config = ConfigParser(inline_comment_prefixes='#')
     with open(config_path, 'r') as f:
         raw_config.read_file(f)
@@ -75,7 +93,6 @@ def prompt_input(question, default=None):
     """
     given a question, prompts user for command line input
     returns True for 'yes'/'y' and False for 'no'/'n' responses
-
     """
     assert default in ('yes', 'no', None), \
         "Default response must be either 'yes', 'no', or None"
@@ -105,3 +122,54 @@ def prompt_input(question, default=None):
         else:
             sys.stdout.write("Please respond with either 'yes' (or 'y') \
             or 'no' (or 'n')\n")
+
+
+def write_remote_submitter(remote_shell, job_config, env_activate_cmd, env_deactivate_cmd, submitter_walltime='12:00:00'):
+    remote_dir = job_config['workingdir']
+    # TODO: ability to handle custom-named submission script
+    submitter_fpath = opj(remote_dir, 'submit_jobs.sh')
+
+    try:
+        assert remote_shell.is_dir(remote_dir)
+    except AssertionError as e:
+        raise ValueError(
+            f"Can't create job submission script in dir: {remote_dir}. \
+            Intended directory is an existing file."
+        ).with_traceback(e.__traceback__)
+    except FileNotFoundError as e:
+        raise FileNotFoundError(
+            f"Can't create job submission script in dir: {remote_dir}. \
+            Intended directory does not exist."
+        ).with_traceback(e.__traceback__)
+
+    template_vals = {
+        'jobname': job_config['jobname'],
+        'walltime': submitter_walltime,
+        'modules': job_config['modules'],
+        'activate_cmd': env_activate_cmd,
+        'deactivate_cmd': env_deactivate_cmd,
+        'env_name': job_config['env_name'],
+        'cmd_wrapper': job_config['cmd_wrapper'],
+        'submitter_script': submitter_fpath
+    }
+
+    template = Template(
+"""#!/bin/bash -l
+
+#PBS -N ${jobname}-submitter
+#PBS -q default
+#PBS -l nodes=1:ppn=1
+#PBS -l walltime=${walltime}
+#PBS -m bea
+
+module load $modules
+$activate_cmd $env_name
+
+$cmd_wrapper $submitter_script
+
+$deactivate_cmd"""
+    )
+
+    content = template.substitute(template_vals)
+    remote_shell.write_text(submitter_fpath, content)
+    return submitter_fpath
diff --git a/cluster_scripts/config.py b/cluster_scripts/config.py
@@ -3,22 +3,55 @@
 job_config = dict()
 
 # ====== MODIFY ONLY THE CODE BETWEEN THESE LINES ======
-# job creation options
-job_config['startdir'] = # path to the foler for this project.  Should be something like /dartfs/rc/lab/D/DBIC/CDL/<your_net_id>/<project_name>
+# directory location
+job_config['startdir'] =        # (str) path to the remote foler for this project.
+                                # Should be something like /dartfs/rc/lab/D/DBIC/CDL/<your_net_id>/<project_name>
+
+# job environment options
+job_config['modules'] =         # (str) modules you need to load for your scripts
+                                # separated by a space (e.g., "python matlab")
+job_config['env_type'] =        # (str) what kind of Python environment you use
+                                # (NOTE: sole option is currently "conda" -- "venv"
+                                # and "virtualenv" coming soon!)
+job_config['env_name'] =        # (str) names of (currently, conda) environment
+                                # you want your submission script and jobs to run in
+job_config['cmd_wrapper'] =     # (str) replace with actual command wrapper
+                                # (e.g. "python", "matlab", etc.)
+
+# runtime options
+job_config['jobname'] =         # (str) default job name
+job_config['queue'] =           # (str) options: default, test, largeq
+                                # (when in doubt, use "largeq")
+job_config['nnodes'] =          # (int) how many nodes to use for this one job
+job_config['ppn'] =             # (int) how many processors to use for this one
+                                # job (assume 4GB of RAM per processor)
+job_config['walltime'] =        # (str) maximum runtime, in h:MM:SS
+                                # (e.g., "10:00:00")
+
+# Email update options
+job_config['email_updates'] =   # (str) what events you want to receive email
+                                # notifications about (see below)
+job_config['email_addr'] =      # (str) email address where you want job
+                                # notifications sent (see below)
+
+# Torque (the cluster's resource manager) can send you emails about the status
+# of your job.  These emails will come from "root" (torque@northstar.dartmouth.edu).
+# To set your notification preferences for the current batch of to-be-submitted
+# jobs, set the 'email_updates' value to a *single* string that consists of some
+# combination of the following options:
+#   + "a" - notify me when a job is aborted (Torque default behavior)
+#   + "b" - notify me when a job begins
+#   + "e" - notify me when a job finishes
+
+# If you would like to receive emails for any of these events, set the value of
+# 'email_addr' to the address you'd like to receive them
+# To receive no emails from Torque, set 'email_updates' to "n" (no mail) and
+# 'email_addr' to an empty string ('')
+
+# ====== MODIFY ONLY THE CODE BETWEEN THESE LINES ======
+
 job_config['datadir'] = opj(job_config['startir'], 'data')
 job_config['workingdir'] = opj(job_config['startir'], 'scripts')
 job_config['template'] = opj(dirname(realpath(__file__)), 'run_job_cluster.sh')
 job_config['scriptdir'] = opj(job_config['workingdir'], 'scripts')
-job_config['lockdir'] = opj(job_config['workingdir'], 'locks')
-
-# runtime options
-job_config['jobname'] = # (str) default job name
-job_config['q'] =   # (str) options: default, test, largeq (when in doubt, use "largeq")
-job_config['nnodes'] = # (int) how many nodes to use for this one job
-job_config['ppn'] = # (int) how many processors to use for this one job (assume 4GB of RAM per processor)
-job_config['walltime'] =  # (str) maximum runtime, in h:MM:SS (e.g., "10:00:00")
-job_config['cmd_wrapper'] =  # (str) replace with actual command wrapper (e.g. "python", "matlab", etc.)
-job_config['modules'] = # (str) modules you need to load for your scripts separated by a space (e.g., "python matlab")
-job_config['env_type'] = # (str) what kind of Python environment you use (NOTE: sole option is currently conda -- venv and virtualenv coming soon!)
-job_config['env_name'] = # (str) names of (currently, conda) environment you want your submission script and jobs to run in
-# ====== MODIFY ONLY THE CODE BETWEEN THESE LINES ======
+job_config['lockdir'] = opj(job_config['workingdir'], 'locks')
diff --git a/cluster_scripts/run_job_cluster.sh b/cluster_scripts/run_job_cluster.sh
@@ -8,22 +8,20 @@
 #PBS -N <config['jobname']>
 
 # specify the queue the job will be added to (if more than 600, use largeq)
-#PBS -q <config['q']>
+#PBS -q <config['queue']>
 
 # specify the number of cores and nodes (estimate 4GB of RAM per core)
 #PBS -l nodes=<config['nnodes']>:ppn=<config['ppn']>
 
 # specify how long the job should run (wall time)
 #PBS -l walltime=<config['walltime']>
 
-# set the working directory *of the job* to the specified start directory
-cd <config['startdir']>
 
 echo ----
 
-echo ACTIVATING MEMORY DYNAMICS VIRTUAL ENVIRONMENT
-module load python
-source activate memdyn
+echo ACTIVATING VIRTUAL ENVIRONMENT
+module load <config['modules']>
+<config['env_cmd']> <config['env_name']>
 
 echo ----