diff --git a/auto-champ.py b/auto-champ.py index ce8a878..bd2991c 100755 --- a/auto-champ.py +++ b/auto-champ.py @@ -39,11 +39,11 @@ elif args.launch: if "launch_template" in env_con.fields.keys(): - if env_con.fields["HPRC"]: - print("Launching HPRC Job.") + if env_con.fields["runner_format"] == 'slurm': + print("Launching Slurm Job.") utils.check_continue(env_con.fields["yall"]) env_con.load_launch_template() - launcher.launch_handler(env_con) + launcher.launch(env_con) elif args.collect: diff --git a/autochamp-config.cfg b/autochamp-config.cfg index a5f0df8..263dfff 100644 --- a/autochamp-config.cfg +++ b/autochamp-config.cfg @@ -1,4 +1,4 @@ -champsim_root = /your/path/here/ +champsim_root = /your/path/here #=============Parameters for building multiple champsim binaries ============# #Path to the configuration files @@ -28,7 +28,7 @@ launch_file = temp.job #============================================================================# #==================# Parameters for running multiple sims #==================# -HPRC = 1 +runner_format = popen enable_json_output = 1 warmup = 0000000 sim_inst = 100000000 diff --git a/champc_lib/config_env.py b/champc_lib/config_env.py index acaa2c6..6c1b7fa 100644 --- a/champc_lib/config_env.py +++ b/champc_lib/config_env.py @@ -1,3 +1,4 @@ +import sys import os import re import champc_lib.utils as utils @@ -11,8 +12,8 @@ def __init__(self): self.required_fields = ["champsim_root", "build_list", "configs_path", "results_path", "workload_path", "binaries_path", "limit_hours", "ntasks", "account", "workload_list", "warmup", "sim_inst", - "results_collect_path", "HPRC","enable_json_output", "stats_list"] - self.required_bool = ["HPRC", "enable_json_output"] + "results_collect_path", "runner_format","enable_json_output", "stats_list"] + self.required_bool = ["enable_json_output"] self.optional_fields = ["launch_file", "baseline", "launch_template","yall"] self.ignore_fields = ["output_name", "result_str"] @@ -42,23 +43,20 @@ def load_launch_template(self): if not os.path.exists(self.fields["launch_template"]): print("ERROR: LAUNCH TEMPLATE DEFINED BUT DOES NOT EXIST: " + self.fields["launch_template"] + "\n") exit() - - lt = open(self.fields["launch_template"], "r") self.fields["launch_fields"] = [] - for line in lt: - line = line.strip() - if "=" not in line: - continue - matches = re.findall(r"{([^{}]*)}", line) - for match in matches: - if match not in self.fields.keys() and match not in self.ignore_fields: - print("{} defined in template file but not in control.cfg\n".format(match)) - utils.check_continue(self.fields["yall"]) - self.fields["launch_fields"].append(match) - - lt.close() + with open(self.fields["launch_template"], "r") as lt: + for line in lt: + line = line.strip() + if "=" not in line: + continue + matches = re.findall(r"{([^{}]*)}", line) + for match in matches: + if match not in self.fields.keys() and match not in self.ignore_fields: + print("{} defined in template file but not in control.cfg\n".format(match)) + utils.check_continue(self.fields["yall"]) + self.fields["launch_fields"].append(match) def build_check(self): if self.fields["build_list"] == "": diff --git a/champc_lib/launch.py b/champc_lib/launch.py index d930e7e..d482d13 100644 --- a/champc_lib/launch.py +++ b/champc_lib/launch.py @@ -1,186 +1,75 @@ import sys import os from datetime import date -import time -import subprocess -import re -import champc_lib.utils as utils +import itertools -def check_load(env_con): - username = env_con.fields["username"] - job_limit = int(env_con.fields["job_limit"]) - if env_con.fields["HPRC"]: - procs_running = int(subprocess.check_output("squeue -u " + username + " | wc -l",\ - stderr = subprocess.STDOUT, shell = True)) - 1 - print(time.strftime("%H:%M:%S", time.localtime()) + ": Jobs running " + str(procs_running) + " Limit " + str(job_limit)) - if procs_running < job_limit: - return False - else: - time.sleep(30) - return True - else: - procs_running = int(subprocess.check_output("ps -u {} | grep \"{}\" | wc -l".format(username, str(env_con.fields["current_binary"])),\ - stderr = subprocess.STDOUT, shell = True)) - print("Procs running: {} Bin {}".format(procs_running, str(env_con.fields["current_binary"]))) - print(time.strftime("%H:%M:%S", time.localtime()) + ": Jobs running " + str(procs_running) + " Limit " + str(job_limit)) - if procs_running < job_limit: - return False - else: - time.sleep(30) - return True +import champc_lib.popen_runner as popen_runner +import champc_lib.slurm_runner as slurm_runner +import champc_lib.utils as utils def create_results_directory(env_con): - - results_path = env_con.fields["results_path"] - num_cores = env_con.fields["num_cores"] - if not os.path.isdir(results_path + str(date.today()) + "/" + str(num_cores) + "_cores/1/"): - print("Creating new directory: " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/1/") - os.system("mkdir " + results_path + str(date.today()) + "/") - os.system("mkdir " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/") - os.system("mkdir " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/1/") - results_path += str(date.today()) + "/" + str(num_cores) + "_cores/1/" - else: - num_dirs = 1 - for f in os.listdir(results_path + str(date.today()) + "/" + str(num_cores) + "_cores/"): - if os.path.isdir(results_path + str(date.today()) + "/" + str(num_cores) + "_cores/" + f): - num_dirs += 1 - print("Creating new results directory: " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/" + str(num_dirs) + "/") - os.system("mkdir " + results_path + str(date.today()) + "/" + str(num_cores) + "_cores/" + str(num_dirs) + "/") - results_path += str(date.today()) + "/" + str(num_cores) + "_cores/" + str(num_dirs) + "/" + results_path = os.path.join(env_con.fields["results_path"], str(date.today()), str(env_con.fields["num_cores"]) + "_cores") + num_dirs = 1 + while os.path.isdir(os.path.join(results_path, str(num_dirs))): + num_dirs += 1 + + results_path = os.path.join(results_path, str(num_dirs)) + print("Creating new directory:", results_path) + os.makedirs(results_path, exist_ok=True) return results_path -def launch_simulations(env_con, launch_str, result_str, output_name): - launch_str = launch_str.strip() + " &> {}".format(result_str) - print("Final CMD: {}".format(launch_str)) - while check_load(env_con): - continue - - os.system(launch_str) - -def sbatch_launch(env_con, launch_str, result_str, output_name): - - while check_load(env_con): - continue - - temp_launch = open(env_con.fields["launch_file"], "w") - - #open the template file - tmpl = open(env_con.fields["launch_template"], "r") - - for line in tmpl: - matches = re.findall(r"{([^{}]*)}", line) - out_line = line - for match in matches: - if match not in env_con.fields.keys() and match not in env_con.ignore_fields: - print("{}: Not defined and required for launching\n".format(match)) - exit() - if match in env_con.ignore_fields: - if match == "result_str": - out_line = out_line.replace("{" + match + "}", result_str) - elif match == "output_name": - print(output_name) - out_line = out_line.replace("{" + match + "}", output_name) - else: - out_line = out_line.replace("{" + match + "}", env_con.fields[match]) - - temp_launch.write(out_line.strip() + "\n") - - temp_launch.write(launch_str) - temp_launch.close() - - print("Running command: " + "sbatch " + env_con.fields["launch_file"]) - os.system("sbatch " + env_con.fields["launch_file"]) - os.system("rm " + env_con.fields["launch_file"]) - -def launch_handler(env_con): - - #init the structs holding the list of launching items - binaries = [] - workloads = [] - - with open(env_con.fields["binary_list"], "r") as binary_list_file: - #gather each binary - binaries = list(utils.filter_comments_and_blanks(binary_list_file)) - - with open(env_con.fields["workload_list"], "r") as workloads_list_file: - workloads = list(utils.filter_comments_and_blanks(workloads_list_file)) - - - #workload director - workload_dir = env_con.fields["workload_path"] - - - - print("Binaries launching: ") - print("Launching workloads: ") - count = 0 - - #This prints the workloads in 4 columns - for a in workloads: - count += 1 - print(a, end="\t") - if count == 4: - count = 0 - print() - print() - - print("Launching " + str((len(binaries) * len(workloads))) + " continue? [Y/N]") - cont = input().lower() - if cont != "y": - print("Exiting job launch...") - exit() - print("Launching jobs...") - - binaries_path = env_con.fields["binaries_path"] - results_path = "" - - if env_con.output_path == "": - results_path = create_results_directory(env_con) - else: - results_path = env_con.output_path - - warmup = env_con.fields["warmup"] - sim_inst = env_con.fields["sim_inst"] - - results_str = "" - launch_str = "{}{} -warmup_instructions {} -simulation_instructions {} -traces {}\n" - results_output_s = "" - trace_str = "" - output_name = "" - num_launch = 0 - - print("Job binaries: {}".format(binaries)) - - for a in binaries: - for b in workloads: - splitload = b.split(" ") - - env_con.fields["current_binary"] = a - - #supporting multicore by iterating through the workload list - if(len(splitload) > 1): - for subwl in splitload: - #create results file name - results_output_s += subwl.strip() + "_" - #trace str needs to include wl directory since it references each trace's location - trace_str += workload_dir.strip() + subwl.strip() + " " - results_output_s += "multi" - else: - results_output_s = b - trace_str = workload_dir + b - - json_flag = '' - if env_con.fields["enable_json_output"]: - json_flag = " -j" - - output_name = results_output_s + "_" + a + "_" - results_str = results_path + results_output_s + "_bin:" + a - f_launch_str = launch_str.format(binaries_path, a, str(env_con.fields["warmup"]), str(env_con.fields["sim_inst"]) + json_flag, trace_str) - print("Launching command: {}".format(f_launch_str)) - print("Writing results to: {}".format(results_str)) - if env_con.fields["HPRC"]: - sbatch_launch(env_con, f_launch_str, results_str, output_name) - else: - launch_simulations(env_con, f_launch_str, results_str, output_name) - num_launch += 1 - print("Launching Job " + str(num_launch)) +def get_command_tuple(binary, workload, env_con): + launch_str = ("{binary}", "-warmup_instructions", "{warmup_instructions}", "-simulation_instructions", "{simulation_instructions}", "{json}", "--", "{traces}", "&>", "{output_name}") + + splitload = workload.split(" ") + + #trace str needs to include wl directory since it references each trace's location + trace_str = ' '.join(os.path.join(env_con.fields['workload_path'], subwl) for subwl in splitload) + + # create results file name + results_output_s = workload if len(splitload) == 1 else '_'.join((*splitload, "multi")) + output_name = '_'.join((results_output_s, binary)) + + json_flag = '-j' if env_con.fields["enable_json_output"] else '' + + return tuple(arg.format( + binary=os.path.join(env_con.fields["binaries_path"], binary), + warmup_instructions=env_con.fields["warmup"], + simulation_instructions=env_con.fields["sim_inst"], + json=json_flag, + traces=trace_str, + output_name=os.path.join(env_con.output_path, output_name) + ) for arg in launch_str) + +def launch(env_con): + with open(env_con.fields['binary_list'], 'r') as binary_list_file: + binaries = list(utils.filter_comments_and_blanks(binary_list_file)) #gather each binary + + print("Binaries launching: ") + for a in binaries: + print(a) + print() + + with open(env_con.fields['workload_list'], 'r') as workloads_list_file: + workloads = list(utils.filter_comments_and_blanks(workloads_list_file)) + + #This prints the workloads in 4 columns + print("Launching workloads: ") + workload_print_groups = [iter(workloads)] * 4 + for a in itertools.zip_longest(*workload_print_groups, fillvalue=''): + print('\t'.join(a)) + print() + + launch_cmds = [get_command_tuple(b,w,env_con) for b,w in itertools.product(binaries, workloads)] + if input("Launching " + str(len(launch_cmds)) + " continue? [y/N] ").lower() != "y": + sys.exit("Exiting job launch...") + print("Launching jobs...") + + env_con.output_path = env_con.output_path or create_results_directory(env_con) + if env_con.fields["runner_format"] == 'slurm': + slurm_runner.run(launch_cmds, env_con) + elif env_con.fields["runner_format"] == 'echo': + for r in launch_cmds: + print(*r) + else: + popen_runner.run(launch_cmds, env_con) diff --git a/champc_lib/popen_runner.py b/champc_lib/popen_runner.py new file mode 100644 index 0000000..b83104b --- /dev/null +++ b/champc_lib/popen_runner.py @@ -0,0 +1,32 @@ +import subprocess +import time +import collections +import os +import sys +import itertools +from timeit import default_timer as timer +from datetime import timedelta + +def begin(fname, *args): + f = open(fname, 'wt') + return f, subprocess.Popen(args, stdout=f, stderr=f) + +def check_finish(f, p): + retval = p.poll() + if retval is not None: + f.close() + return retval + +def run(runs, env_con): + start = timer() + processargs = collections.deque(runs) + active_processes = [] + while processargs or active_processes: + unfinished = [(check_finish(*p) is None) for p in active_processes] + active_processes = list(itertools.compress(active_processes, unfinished)) + + while processargs and len(active_processes) < int(env_con.fields['job_limit']): + active_processes.append(begin(str(len(runs)-len(processargs)) + '.txt', *processargs[0])) + processargs.popleft() + time.sleep(1) + diff --git a/champc_lib/slurm_runner.py b/champc_lib/slurm_runner.py new file mode 100644 index 0000000..7508b97 --- /dev/null +++ b/champc_lib/slurm_runner.py @@ -0,0 +1,29 @@ +import os + +header_fmtstr = '''#!/bin/bash +#SBATCH --get-user-env=L +#SBATCH --time={limit_hours}:00:00 +#SBATCH --ntasks={ntasks} +#SBATCH --mem=1024M +#SBATCH --mail-type=FAIL +#SBATCH --mail-user={mail} +#SBATCH --account={account} +''' + +''' +#SBATCH --job-name={output_name} +#SBATCH --output={result_str}.%j +''' + +def run(runs, env_con): + with open(env_con.fields['launch_file'], 'w') as launch_file: + launch_file.write(header_fmtstr.format(**env_con.fields)) + launch_file.write('#SBATCH --array=1-{}%{}\n'.format(len(runs), env_con.fields['job_limit'])) + + launch_file.write('sed -n "$SLURM_ARRAY_TASK_ID p" <