From ff71305a53715c1f2c608f5638df28744d572549 Mon Sep 17 00:00:00 2001 From: Brent Huisman Date: Mon, 6 Jun 2016 10:56:27 +0200 Subject: [PATCH] ADDED: a version of the job submission script that does not copy the data-dir. Useful for large data-dirs --- cluster_tools/CMakeLists.txt | 3 + cluster_tools/check_tmp_cluster.sh | 12 +++ cluster_tools/gate_job_cluster_nomove.job | 102 ++++++++++++++++++ .../gate_run_submit_cluster_nomove.sh | 101 +++++++++++++++++ 4 files changed, 218 insertions(+) create mode 100755 cluster_tools/check_tmp_cluster.sh create mode 100644 cluster_tools/gate_job_cluster_nomove.job create mode 100644 cluster_tools/gate_run_submit_cluster_nomove.sh diff --git a/cluster_tools/CMakeLists.txt b/cluster_tools/CMakeLists.txt index caeb178..76a4286 100644 --- a/cluster_tools/CMakeLists.txt +++ b/cluster_tools/CMakeLists.txt @@ -11,6 +11,9 @@ if(CLITK_BUILD_TOOLS) mergeDosePerEnergyFile.sh mergeStatFile.py mergeStatFile.sh + gate_run_submit_cluster_nomove.sh + gate_job_cluster_nomove.job + check_tmp_cluster.sh ) install(FILES ${SCRIPTS} DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE) diff --git a/cluster_tools/check_tmp_cluster.sh b/cluster_tools/check_tmp_cluster.sh new file mode 100755 index 0000000..64f0007 --- /dev/null +++ b/cluster_tools/check_tmp_cluster.sh @@ -0,0 +1,12 @@ +#! /bin/bash + +for i in 1 $(seq 4 20) $(seq 30 36) +do + echo + echo "Files on host linux${i}.dg.creatis.insa-lyon.fr " + #ssh -o "StrictHostKeyChecking no" linux${i}.dg.creatis.insa-lyon.fr "find /tmp -user $1 -exec rm -fr {} \; " + ssh -o "StrictHostKeyChecking no" linux${i}.dg.creatis.insa-lyon.fr "find /tmp -user $1 2>/dev/null" +#ssh -o "StrictHostKeyChecking no" linux${i}.dg.creatis.insa-lyon.fr "rm -fr /tmp/tmp.*/core* /tmp/tmp.*/mac /tmp/tmp.*/data /tmp/tmp.*/output/*root" +#ssh -o "StrictHostKeyChecking no" linux${i}.dg.creatis.insa-lyon.fr "mv /tmp/tmp.* data/gate/hybrid_analog_mc/liver/hybrid_emlivermore/rr/rayleigh/run.O6VN" +done + diff --git a/cluster_tools/gate_job_cluster_nomove.job b/cluster_tools/gate_job_cluster_nomove.job new file mode 100644 index 0000000..80d5b3d --- /dev/null +++ b/cluster_tools/gate_job_cluster_nomove.job @@ -0,0 +1,102 @@ +#!/bin/bash +# +# MACRODIR +# MACROFILE +# RELEASEDIR +# OUTPUTDIR +# INDEX +# INDEXMAX +# PARAM +# +#PBS -r n +#PBS -l walltime=5:00:00 +#PBS -j oe +#PBS -l mem=5Gb + + +function error { +echo "ERROR: $1" +exit 1 +} + +function warning { +echo "WARNING: $1" +} + +test -f ${HOME}/.bashrc && echo "Sourcing bashrc" && source ${HOME}/.bashrc +set -u + +echo "Checking" +uname -a +echo "MACRODIR=${MACRODIR:?"unknown MACRODIR"}" +echo "MACROFILE=${MACROFILE:?"unknown MACROFILE"}" +echo "RELEASEDIR=${RELEASEDIR:?"unknown RELEASEDIR"}" +echo "OUTPUTDIR=${OUTPUTDIR:?"unknown OUTPUTDIR"}" +echo "PBS_JOBID=${PBS_JOBID}" +echo "INDEX=${INDEX}" +echo "INDEXMAX=${INDEX}" +echo "PARAM=${PARAM}" + +if test "$RELEASEDIR" = "NONE" +then + echo Using $(which Gate) + ldd $(which Gate) +else + test -d "${RELEASEDIR}" || error "can't find release" + md5sum ${RELEASEDIR}/Gate + test -f ${RELEASEDIR}/libGate.so && md5sum ${RELEASEDIR}/libGate.so + + echo "Finding libraries" + ROOTLIBS="${RELEASEDIR}/libCore.so:${RELEASEDIR}/libCint.so:${RELEASEDIR}/libRIO.so:${RELEASEDIR}/libNet.so:${RELEASEDIR}/libHist.so:${RELEASEDIR}/libGraf.so:${RELEASEDIR}/libGraf3d.so:${RELEASEDIR}/libGpad.so:${RELEASEDIR}/libTree.so:${RELEASEDIR}/libRint.so:${RELEASEDIR}/libPostscript.so:${RELEASEDIR}/libMatrix.so:${RELEASEDIR}/libPhysics.so:${RELEASEDIR}/libMathCore.so:${RELEASEDIR}/libThread.so:" + echo "ROOTLIBS=${ROOTLIBS}" + G4LIBS="$(for library in $(find "${RELEASEDIR}" -maxdepth 1 -name 'libG4*.so'); do echo -n "${library}:"; done)" + echo "G4LIBS=${G4LIBS}" + CLHEPLIBS="$(for library in $(find "${RELEASEDIR}" -maxdepth 1 -name 'libCLHEP*.so'); do echo -n "${library}:"; done)" + echo "CLHEPLIBS=${CLHEPLIBS}" + GATELIBS="" + test -f ${RELEASEDIR}/libGate.so && GATELIBS="${RELEASEDIR}/libGate.so:" + echo "GATELIBS=${GATELIBS}" +fi +test -d "${MACRODIR}" && test -d "${MACRODIR}/mac" || error "invalid macro" + + +echo "Copying/linking inputs" +LOCALMACRODIR=$(mktemp -d) +trap "mv output ${OUTPUTDIR}/output.${PBS_JOBID%%.*} ; rm -rf ${LOCALMACRODIR} ; exit 1" 1 2 3 15 +cd ${LOCALMACRODIR} +## below line seems not to be trapped correctly +cp -r -L "${MACRODIR}"/mac . +ln -s "${MACRODIR}"/data ./data +mkdir output + +# Enforce one thread +ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=1 + +echo "Lauching macro" +date +if test "$RELEASEDIR" = "NONE" +then + Gate ${PARAM} ${MACROFILE} || error "gate failed" +else + LD_PRELOAD="${ROOTLIBS}${G4LIBS}${CLHEPLIBS}${GATELIBS}" \ + G4LEVELGAMMADATA="${RELEASEDIR}/PhotonEvaporation2.1" \ + G4RADIOACTIVEDATA="${RELEASEDIR}/RadioactiveDecay3.3" \ + G4LEDATA="${RELEASEDIR}/G4EMLOW6.19" \ + G4NEUTRONHPDATA="${RELEASEDIR}/G4NDL3.14" \ + G4ABLADATA="${RELEASEDIR}/G4ABLA3.0" \ + G4REALSURFACEDATA="${RELEASEDIR}/RealSurface1.0" \ + G4NEUTRONXSDATA="${RELEASEDIR}/G4NEUTRONXS1.0" \ + G4PIIDATA="${RELEASEDIR}/G4PII1.2" \ + /usr/bin/time --format="real %es\nuser %Us\nsys %Ss\nmaxmem %Mk" \ + ${RELEASEDIR}/Gate ${PARAM} ${MACROFILE} \ + || error "gate failed" +fi + +echo "Copying data back" +mv output "${OUTPUTDIR}/output.${PBS_JOBID%%.*}" + +echo "Cleanup" +unlink "${LOCALMACRODIR}"/data +rm -rf "${LOCALMACRODIR}" + +echo "Success!!!" diff --git a/cluster_tools/gate_run_submit_cluster_nomove.sh b/cluster_tools/gate_run_submit_cluster_nomove.sh new file mode 100644 index 0000000..47ad699 --- /dev/null +++ b/cluster_tools/gate_run_submit_cluster_nomove.sh @@ -0,0 +1,101 @@ +#! /bin/bash + +set -u +SCRIPTNAME="$(basename "${0}")" + +# ------------------------------------------------- +function error { + echo "ERROR: $1" + usage + exit 1 +} +# ------------------------------------------------- + +DEFAULTRELEASESUFFIX="NONE" +DEFAULTNUMBEROFJOBS="10" + +# ------------------------------------------------- +function usage { + echo "${SCRIPTNAME} mac/main.mac njobs releasesuffix paramtogate" + echo "default njobs = ${DEFAULTNUMBEROFJOBS}" + echo "default releasesuffix = ${DEFAULTRELEASESUFFIX} (NONE means use Gate in PATH)" + echo "default paramtogate = \"\" (use \"\" around params and \\ in front of commas)" +} +# ------------------------------------------------- + +test $# -eq 0 && usage && exit 0 + +RELEASESUFFIX=${3:-"${DEFAULTRELEASESUFFIX}"} +RELEASEDIR="${HOME}/releases/grid_release${RELEASESUFFIX}" +JOBFILE="$(dirname $0)/gate_job_cluster_nomove.job" + +echo "Checking stuff" +test -f ${JOBFILE} || error "can't find job file ${JOBFILE}" +if test "${RELEASESUFFIX}" = "${DEFAULTRELEASESUFFIX}" +then + RELEASEDIR="NONE" + which Gate 2>&1 >/dev/null || error "there is no Gate in the PATH" +else + test -d ${RELEASEDIR} || error "invalid release dir ${RELEASEDIR}" +fi +MACRODIR=$(pwd) +test -d ${MACRODIR}/mac && test -d ${MACRODIR}/data || error "invalid path" +MACROFILE=${1:?"provide relative macro path"} +test -f ${MACRODIR}/${MACROFILE} || error "invalid macro" +OUTPUTDIR=$(mktemp --tmpdir=${MACRODIR} -d run.XXXX || error "can't create temp dir") +test -d ${OUTPUTDIR} || error "can't locate output dir" +RUNID=${OUTPUTDIR##*.} +NJOBS=${2:-"${DEFAULTNUMBEROFJOBS}"} +NJOBSMAX=${NJOBS} +PARAM="${4:-\"\"}" + +echo "Lets roll!!" +echo "runid is ${RUNID}" + +QSUB=$(which qsub 2> /dev/null) +# echo "qsub is $(which qsub)" +test -z "${QSUB}" && QSUB="noqsub" +if test "${QSUB}" = "noqsub" +then + echo "qsub is not found. Simply run Gate on multiple cores." +fi + +test -z "${PARAM}" && echo "no param" || echo "param is ${PARAM}" +if test "$RELEASESUFFIX" = "$DEFAULTRELEASESUFFIX" +then + echo "Gate is $(which Gate)" +else + echo "Gate release is $(basename ${RELEASEDIR})" +fi +echo "submitting ${NJOBS} jobs" + +PARAMFILE="${OUTPUTDIR}/params.txt" +echo "njobs = ${NJOBS}" >> "${PARAMFILE}" +echo "macro = ${MACROFILE}" >> "${PARAMFILE}" +test -z "${PARAM}" || echo "param = ${PARAM}" >> "${PARAMFILE}" + +# Copy macros files (for log) +mkdir ${OUTPUTDIR}/mac +cp ${MACROFILE} ${OUTPUTDIR}/mac +files=`grep "control/execute" ${MACROFILE} | cut -d " " -f 2` +for i in $files +do + cp $i ${OUTPUTDIR}/mac +done + +while test $NJOBS -gt 0; do + + if test "${QSUB}" = "noqsub" + then + echo "Launching Gate log in ${OUTPUTDIR}/gate_${NJOBS}.log" + PARAM=\"${PARAM}\" INDEX=${NJOBS} INDEXMAX=${NJOBSMAX} OUTPUTDIR=${OUTPUTDIR} RELEASEDIR=${RELEASEDIR} MACROFILE=${MACROFILE} MACRODIR=${MACRODIR} PBS_JOBID="local_${NJOBS}" bash "${JOBFILE}" > ${OUTPUTDIR}/gate_${NJOBS}.log & + else + qsub -N "gatejob.${RUNID}" -o "${OUTPUTDIR}" \ + -v "PARAM=${PARAM},INDEX=${NJOBS},INDEXMAX=${NJOBSMAX},OUTPUTDIR=${OUTPUTDIR},RELEASEDIR=${RELEASEDIR},MACROFILE=${MACROFILE},MACRODIR=${MACRODIR}" \ + "${JOBFILE}" || error "submission error" + fi + + let NJOBS-- +done + +echo "runid is ${RUNID}" -- 2.45.1