#!/usr/bin/env python import test_gordon_lib as lib import sys import os def main(argv=None): """ Usage is: submit.py [--account ] [--url ] -- Run from the working dir of the job which must contain (in addition to the job files) a file named scheduler.conf with scheduler properties for the job. , if present, gives the project to charge the job to. Url is the url of the submitting website including the taskid parameter. Returns 0 with "jobid=" on stdout if job submitted ok Returns 1 with multiline error message on stdout if error. Returns 2 for the specific error of queue limit exceeded. """ #COMMAND LINE PARSING import argparse parser = argparse.ArgumentParser() parser.add_argument('--account', metavar="ACCOUNT", type=str, default=lib.account, help="The account string to use when submitting jobs. Default is read from config files.") parser.add_argument('--url', metavar="URL", dest="URL", type=str, help="Notification URL") try: cmdline_options, cmdline = parser.parse_known_args(argv) cmdline = cmdline[1:] if not ('--' in cmdline) else cmdline[cmdline.index('--')+1:] except Exception as e: print "There was a problem submitting your job" print e sys.exit(1) account = cmdline_options.account url = cmdline_options.URL #cmdline as an array (and already set) tooltype = lib.getToolType(cmdline) # On gordon for the time being we need to ignore the chargecode account = lib.account scheduler_properties = lib.getProperties("scheduler.conf") print scheduler_properties scheduler_info = lib.schedulerInfo(scheduler_properties, tooltype) print scheduler_info # If this is a "direct" run type job we don't need to create a qsub script, we'll just run batch_ommand.cmdline. if scheduler_info["is_direct"]: return lib.submitDirectJob(account, url, lib.email, lib.jobname, cmdline) runtime = int(scheduler_info["runtime"]) useLocalDisk = False """ Workaround for problems with file io on oasis and longer mrbayes runs. Instead of running on oasis, we'll copy the working dir to the compute nodes local storage and copy the results back when the job completes. Since many mrbayes jobs timeout we need a special trick to copy results of jobs that timeout: Right before we launch mrbayes we launch a shell script in the background that sleeps a few min less than the job's runtime and then copies the results. If mrbayes terminates normally the background sleep is killed automatically. """ if (tooltype == "mrbayes" and runtime > 60): useLocalDisk = True # I'm backing out the workaround by setting useLocalDisk to false. useLocalDisk = False # Write the command line to a file, batch_command.cmdline. rfile = open(lib.cmdfile, "w") # On Gordon we have to use bash, not sh rfile.write("#!/bin/bash\n") rfile.writelines((" ".join(cmdline), "\n")) rfile.close() os.chmod(lib.cmdfile, 0744); # Create the epilogue script rfile = open(lib.epilogue, "w") text = """#!/bin/bash date +'%%s %%a %%b %%e %%R:%%S %%Z %%Y' > %s/term.txt echo "ExitCode=${10}" >> %s/term.txt echo -e "Job Id: $1\\nResource List: $6\\nResources Used: $7\\nQueue Name: $8\\n" >> %s/term.txt""" % \ (lib.jobdir, lib.jobdir, lib.jobdir) rfile.write(text); rfile.close(); os.chmod(lib.epilogue, 0744); # Write the command line to a file, batch_command.cmdline. # Create the qsub script rfile = open(lib.runfile, "w") # On Gordon we have to use bash, not sh text = """#!/bin/bash #PBS -q %s #PBS -N %s #PBS -l walltime=00:%d:00 #PBS -l epilogue=%s #PBS -o scheduler_stdout.txt #PBS -e scheduler_stderr.txt #PBS -W umask=0007 ##PBS -V #PBS -v QOS=%d #PBS -M %s #PBS -m ae #PBS -A %s """ % (scheduler_info["queue"], lib.jobname, scheduler_info["runtime"], lib.epilogue, scheduler_info["qos"], lib.email, account) rfile.write(text) text = "#PBS -l nodes=%d:ppn=%d:noflash\n" % (scheduler_info["nodes"], scheduler_info["ppn"]) rfile.write(text) rfile.write("cd %s\n" % (lib.jobdir, lib.local_jobdir)[useLocalDisk]) if useLocalDisk == True: # Note that it's critical that newlines in the text string are all within the double # quotes; otherwise the echo command line would be split across lines and make no sense. text = """"Due to filesystem problems intermediate results for longer mrbayes runs will not be available while the job is running. The result files will be available when mrbayes finishes. We're working to find a solution." """ rfile.write("echo %s > %s/INTERMEDIATE_RESULTS_README.TXT\n" % (text, lib.jobdir)) rfile.write("cp -r %s/* .\n" % lib.jobdir); sleepTime = int(scheduler_info["runtime"]) - 10 rfile.write("sleep_cp.sh %s %s &\n" % (sleepTime, lib.jobdir)) text = """ source /etc/profile.d/modules.sh SDSC-gateway-submit-attributes $PBS_JOBID $(sed -n '/^email=/ {s/email=//; p;}' _JOBINFO.TXT) export MODULEPATH=/home/diag/jpg/modulefiles/gordon/applications:$MODULEPATH date +'%%s %%a %%b %%e %%R:%%S %%Z %%Y' > start.txt curl %s\&status=START export CIPRES_THREADSPP=%d export CIPRES_NP=%d %s 1>stdout.txt 2>stderr.txt retval=$? date +'%%s %%a %%b %%e %%R:%%S %%Z %%Y' > done.txt qstat -f $PBS_JOBID | grep Job qstat -f $PBS_JOBID | grep resources """ % (url, int(scheduler_info["threads_per_process"]), int(scheduler_info["mpi_processes"]), lib.cmdfile) rfile.write(text) if (useLocalDisk): text = """ echo "Job completed, starting to copy working directory." echo "mkdir %s.complete" mkdir %s.complete echo "cp -r * %s.complete" cp -r * %s.complete echo "mv %s %s.sleep" mv %s %s.sleep echo "mv %s.complete %s" mv %s.complete %s echo "rm -rf %s.sleep" rm -rf %s.sleep echo "Finished copying working directory." """ % (lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir, lib.jobdir) rfile.write(text) rfile.write("curl %s\&status=DONE\n" % url) rfile.write("exit $retval") rfile.close() lib.log("./_JOBINFO.TXT","\nChargeFactor=%f\ncores=%i" % ( scheduler_info.get('ChargeFactor',1.0), scheduler_info['nodes']*scheduler_info['ppn'] ) ) return lib.submitJob() return 0 if __name__ == "__main__": sys.exit(main())