ChampSim · ngober · May 5, 2023 · May 8, 2023 · May 8, 2023 · May 8, 2023
diff --git a/auto-champ.py b/auto-champ.py
@@ -39,11 +39,11 @@
 elif args.launch:
 
   if "launch_template" in env_con.fields.keys():
-    if env_con.fields["HPRC"]:
-      print("Launching HPRC Job.")
+    if env_con.fields["runner_format"] == 'slurm':
+      print("Launching Slurm Job.")
       utils.check_continue(env_con.fields["yall"])
       env_con.load_launch_template()
-  launcher.launch_handler(env_con)
+  launcher.launch(env_con)
 
 elif args.collect:
 

diff --git a/autochamp-config.cfg b/autochamp-config.cfg
@@ -1,4 +1,4 @@
-champsim_root = /your/path/here/
+champsim_root = /your/path/here
 
 #=============Parameters for building multiple champsim binaries ============#
 #Path to the configuration files
@@ -28,7 +28,7 @@ launch_file = temp.job
 #============================================================================#
 
 #==================# Parameters for running multiple sims #==================#
-HPRC = 1
+runner_format = popen
 enable_json_output = 1 
 warmup = 0000000
 sim_inst = 100000000

diff --git a/champc_lib/config_env.py b/champc_lib/config_env.py
@@ -1,3 +1,4 @@
+import sys
 import os
 import re
 import champc_lib.utils as utils
@@ -11,8 +12,8 @@ def __init__(self):
 
     self.required_fields = ["champsim_root", "build_list", "configs_path", "results_path", "workload_path", "binaries_path", 
                     "limit_hours", "ntasks", "account", "workload_list", "warmup", "sim_inst",
-                    "results_collect_path", "HPRC","enable_json_output", "stats_list"]
-    self.required_bool = ["HPRC", "enable_json_output"]
+                    "results_collect_path", "runner_format","enable_json_output", "stats_list"]
+    self.required_bool = ["enable_json_output"]
     self.optional_fields = ["launch_file", "baseline", "launch_template","yall"]
     self.ignore_fields = ["output_name", "result_str"]
 
@@ -42,23 +43,20 @@ def load_launch_template(self):
     if not os.path.exists(self.fields["launch_template"]):
       print("ERROR: LAUNCH TEMPLATE DEFINED BUT DOES NOT EXIST: " + self.fields["launch_template"] + "\n")
       exit()
-
-    lt = open(self.fields["launch_template"], "r")
 
     self.fields["launch_fields"] = [] 
 
-    for line in lt:
-      line = line.strip()
-      if "=" not in line:
-        continue
-      matches = re.findall(r"{([^{}]*)}", line)
-      for match in matches:
-        if match not in self.fields.keys() and match not in self.ignore_fields:
-          print("{} defined in template file but not in control.cfg\n".format(match))
-          utils.check_continue(self.fields["yall"]) 
-        self.fields["launch_fields"].append(match) 
-
-    lt.close()
+    with open(self.fields["launch_template"], "r") as lt:
+        for line in lt:
+          line = line.strip()
+          if "=" not in line:
+            continue
+          matches = re.findall(r"{([^{}]*)}", line)
+          for match in matches:
+            if match not in self.fields.keys() and match not in self.ignore_fields:
+              print("{} defined in template file but not in control.cfg\n".format(match))
+              utils.check_continue(self.fields["yall"]) 
+            self.fields["launch_fields"].append(match) 
 
   def build_check(self):
     if self.fields["build_list"] == "":

diff --git a/champc_lib/launch.py b/champc_lib/launch.py
@@ -1,186 +1,75 @@
 import sys
 import os
 from datetime import date
-import time 
-import subprocess
-import re
-import champc_lib.utils as utils
+import itertools
 
-def check_load(env_con):
-  username = env_con.fields["username"]
-  job_limit = int(env_con.fields["job_limit"])
-  if env_con.fields["HPRC"]:
-    procs_running = int(subprocess.check_output("squeue -u " + username + " | wc -l",\
-      stderr = subprocess.STDOUT, shell = True)) - 1
-    print(time.strftime("%H:%M:%S", time.localtime()) + ": Jobs running " + str(procs_running) + " Limit " + str(job_limit))
-    if procs_running < job_limit:
-      return False
-    else:
-      time.sleep(30)
-      return True
-  else:
-    procs_running = int(subprocess.check_output("ps -u {} | grep \"{}\" | wc -l".format(username, str(env_con.fields["current_binary"])),\
-      stderr = subprocess.STDOUT, shell = True))
-    print("Procs running: {} Bin {}".format(procs_running, str(env_con.fields["current_binary"])))
-    print(time.strftime("%H:%M:%S", time.localtime()) + ": Jobs running " + str(procs_running) + " Limit " + str(job_limit))
-    if procs_running < job_limit:
-      return False
-    else:
-      time.sleep(30)
-      return True
+import champc_lib.popen_runner as popen_runner
+import champc_lib.slurm_runner as slurm_runner
+import champc_lib.utils as utils
 
 def create_results_directory(env_con):
-
-  results_path = env_con.fields["results_path"]
-  num_cores = env_con.fields["num_cores"]
-  if not os.path.isdir(results_path + str(date.today()) + "/" + str(num_cores) + "_cores/1/"):
-    print("Creating new directory: " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/1/")
-    os.system("mkdir " + results_path + str(date.today()) + "/")
-    os.system("mkdir " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/")
-    os.system("mkdir " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/1/")
-    results_path += str(date.today()) + "/" + str(num_cores) + "_cores/1/"
-  else:
-    num_dirs = 1
-    for f in os.listdir(results_path + str(date.today()) + "/" + str(num_cores) + "_cores/"):
-      if os.path.isdir(results_path + str(date.today()) + "/" + str(num_cores) + "_cores/" + f):
-        num_dirs += 1
-    print("Creating new results directory: " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/" + str(num_dirs) + "/")
-    os.system("mkdir " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/" + str(num_dirs) + "/")
-    results_path += str(date.today()) + "/" + str(num_cores) + "_cores/" + str(num_dirs) + "/"
+  results_path = os.path.join(env_con.fields["results_path"], str(date.today()), str(env_con.fields["num_cores"]) + "_cores")
+  num_dirs = 1
+  while os.path.isdir(os.path.join(results_path, str(num_dirs))):
+      num_dirs += 1
+
+  results_path = os.path.join(results_path, str(num_dirs))
+  print("Creating new directory:", results_path)
+  os.makedirs(results_path, exist_ok=True)
   return results_path
 
-def launch_simulations(env_con, launch_str, result_str, output_name):
-  launch_str = launch_str.strip() + " &> {}".format(result_str)
-  print("Final CMD: {}".format(launch_str))
-  while check_load(env_con):
-    continue
-
-  os.system(launch_str)
-
-def sbatch_launch(env_con, launch_str, result_str, output_name): 
-
-  while check_load(env_con):
-    continue
-
-  temp_launch = open(env_con.fields["launch_file"], "w")
-
-  #open the template file
-  tmpl = open(env_con.fields["launch_template"], "r")
-
-  for line in tmpl:
-    matches = re.findall(r"{([^{}]*)}", line)
-    out_line = line
-    for match in matches:
-      if match not in env_con.fields.keys() and match not in env_con.ignore_fields:
-        print("{}: Not defined and required for launching\n".format(match))
-        exit()
-      if match in env_con.ignore_fields:
-        if match == "result_str":
-          out_line = out_line.replace("{" + match + "}", result_str)
-        elif match == "output_name":
-          print(output_name)
-          out_line = out_line.replace("{" + match + "}", output_name)
-      else:
-        out_line = out_line.replace("{" + match + "}", env_con.fields[match])
-
-    temp_launch.write(out_line.strip() + "\n") 
-
-  temp_launch.write(launch_str)
-  temp_launch.close()
-
-  print("Running command: " + "sbatch " + env_con.fields["launch_file"])
-  os.system("sbatch " + env_con.fields["launch_file"])
-  os.system("rm " + env_con.fields["launch_file"])
-
-def launch_handler(env_con):
-
-  #init the structs holding the list of launching items
-  binaries = []
-  workloads = []
-
-  with open(env_con.fields["binary_list"], "r") as binary_list_file:
-    #gather each binary 
-    binaries = list(utils.filter_comments_and_blanks(binary_list_file))
-
-  with open(env_con.fields["workload_list"], "r") as workloads_list_file:
-    workloads = list(utils.filter_comments_and_blanks(workloads_list_file))
-
-
-  #workload director
-  workload_dir = env_con.fields["workload_path"]
-
-
-
-  print("Binaries launching: ")
-  print("Launching workloads: ")
-  count = 0
-
-  #This prints the workloads in 4 columns
-  for a in workloads:
-    count += 1
-    print(a, end="\t")
-    if count == 4:
-      count = 0
-      print()
-  print()
-
-  print("Launching " + str((len(binaries) * len(workloads))) + " continue? [Y/N]") 
-  cont = input().lower()
-  if cont != "y":
-    print("Exiting job launch...")
-    exit()
-  print("Launching jobs...")
-
-  binaries_path = env_con.fields["binaries_path"]
-  results_path = ""
-
-  if env_con.output_path == "":
-    results_path = create_results_directory(env_con)
-  else:
-    results_path = env_con.output_path
-
-  warmup = env_con.fields["warmup"]
-  sim_inst = env_con.fields["sim_inst"]
-
-  results_str = "" 
-  launch_str = "{}{} -warmup_instructions {} -simulation_instructions {} -traces {}\n" 
-  results_output_s = ""
-  trace_str = "" 
-  output_name = "" 
-  num_launch = 0
-
-  print("Job binaries: {}".format(binaries))
-
-  for a in binaries:
-    for b in workloads:
-      splitload = b.split(" ")
-
-      env_con.fields["current_binary"] = a
-
-      #supporting multicore by iterating through the workload list
-      if(len(splitload) > 1):
-        for subwl in splitload:
-          #create results file name
-          results_output_s += subwl.strip() + "_"
-          #trace str needs to include wl directory since it references each trace's location
-          trace_str += workload_dir.strip() + subwl.strip() + " "
-        results_output_s += "multi"
-      else:
-        results_output_s = b
-        trace_str = workload_dir + b
-
-      json_flag = ''
-      if env_con.fields["enable_json_output"]:
-        json_flag = " -j"
-
-      output_name = results_output_s + "_" + a + "_"
-      results_str = results_path + results_output_s + "_bin:" + a 
-      f_launch_str = launch_str.format(binaries_path, a, str(env_con.fields["warmup"]), str(env_con.fields["sim_inst"]) + json_flag, trace_str)
-      print("Launching command: {}".format(f_launch_str))
-      print("Writing results to: {}".format(results_str))
-      if env_con.fields["HPRC"]:
-        sbatch_launch(env_con, f_launch_str, results_str, output_name)
-      else:
-        launch_simulations(env_con, f_launch_str, results_str, output_name)
-      num_launch += 1
-      print("Launching Job " + str(num_launch))
+def get_command_tuple(binary, workload, env_con):
+    launch_str = ("{binary}", "-warmup_instructions", "{warmup_instructions}", "-simulation_instructions", "{simulation_instructions}", "{json}", "--", "{traces}", "&>", "{output_name}")
+
+    splitload = workload.split(" ")
+
+    #trace str needs to include wl directory since it references each trace's location
+    trace_str = ' '.join(os.path.join(env_con.fields['workload_path'], subwl) for subwl in splitload)
+
+    # create results file name
+    results_output_s = workload if len(splitload) == 1 else '_'.join((*splitload, "multi"))
+    output_name = '_'.join((results_output_s, binary))
+
+    json_flag = '-j' if env_con.fields["enable_json_output"] else ''
+
+    return tuple(arg.format(
+        binary=os.path.join(env_con.fields["binaries_path"], binary),
+        warmup_instructions=env_con.fields["warmup"],
+        simulation_instructions=env_con.fields["sim_inst"],
+        json=json_flag,
+        traces=trace_str,
+        output_name=os.path.join(env_con.output_path, output_name)
+    ) for arg in launch_str)
+
+def launch(env_con):
+    with open(env_con.fields['binary_list'], 'r') as binary_list_file:
+        binaries = list(utils.filter_comments_and_blanks(binary_list_file)) #gather each binary
+
+    print("Binaries launching: ")
+    for a in binaries:
+        print(a)
+    print()
+
+    with open(env_con.fields['workload_list'], 'r') as workloads_list_file:
+        workloads = list(utils.filter_comments_and_blanks(workloads_list_file))
+
+    #This prints the workloads in 4 columns
+    print("Launching workloads: ")
+    workload_print_groups = [iter(workloads)] * 4
+    for a in itertools.zip_longest(*workload_print_groups, fillvalue=''):
+        print('\t'.join(a))
+    print()
+
+    launch_cmds = [get_command_tuple(b,w,env_con) for b,w in itertools.product(binaries, workloads)]
+    if input("Launching " + str(len(launch_cmds)) + " continue? [y/N] ").lower() != "y":
+        sys.exit("Exiting job launch...")
+    print("Launching jobs...")
+
+    env_con.output_path = env_con.output_path or create_results_directory(env_con)
+    if env_con.fields["runner_format"] == 'slurm':
+      slurm_runner.run(launch_cmds, env_con)
+    elif env_con.fields["runner_format"] == 'echo':
+      for r in launch_cmds:
+        print(*r)
+    else:
+      popen_runner.run(launch_cmds, env_con)
diff --git a/champc_lib/popen_runner.py b/champc_lib/popen_runner.py
@@ -0,0 +1,32 @@
+import subprocess
+import time
+import collections
+import os
+import sys
+import itertools
+from timeit import default_timer as timer
+from datetime import timedelta
+
+def begin(fname, *args):
+    f = open(fname, 'wt')
+    return f, subprocess.Popen(args, stdout=f, stderr=f)
+
+def check_finish(f, p):
+    retval = p.poll()
+    if retval is not None:
+        f.close()
+    return retval
+
+def run(runs, env_con):
+  start = timer()
+  processargs = collections.deque(runs)
+  active_processes = []
+  while processargs or active_processes:
+      unfinished = [(check_finish(*p) is None) for p in active_processes]
+      active_processes = list(itertools.compress(active_processes, unfinished))
+
+      while processargs and len(active_processes) < int(env_con.fields['job_limit']):
+          active_processes.append(begin(str(len(runs)-len(processargs)) + '.txt', *processargs[0]))
+          processargs.popleft()
+      time.sleep(1)
+
diff --git a/champc_lib/slurm_runner.py b/champc_lib/slurm_runner.py
@@ -0,0 +1,29 @@
+import os
+
+header_fmtstr = '''#!/bin/bash
+#SBATCH --get-user-env=L
+#SBATCH --time={limit_hours}:00:00
+#SBATCH --ntasks={ntasks}
+#SBATCH --mem=1024M
+#SBATCH --mail-type=FAIL
+#SBATCH --mail-user={mail}
+#SBATCH --account={account}
+'''
+
+'''
+#SBATCH --job-name={output_name}
+#SBATCH --output={result_str}.%j
+'''
+
+def run(runs, env_con):
+    with open(env_con.fields['launch_file'], 'w') as launch_file:
+        launch_file.write(header_fmtstr.format(**env_con.fields))
+        launch_file.write('#SBATCH --array=1-{}%{}\n'.format(len(runs), env_con.fields['job_limit']))
+
+        launch_file.write('sed -n "$SLURM_ARRAY_TASK_ID p" <<EOF\n')
+        for r in runs:
+            launch_file.write(' '.join(r) + '\n')
+        launch_file.write('EOF | bash\n')
+
+    print("Running command:", "sbatch", env_con.fields["launch_file"])
+    os.system('sbatch '+env_con.fields['launch_file'])